mirror of
https://gitlab.com/scemama/qp_plugins_scemama.git
synced 2024-11-07 06:33:40 +01:00
Removing dgeam
This commit is contained in:
parent
0858cb290f
commit
cf980f0fae
@ -303,6 +303,34 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
B = d_Y_oovv; ldb = nO*nO;
|
||||
C = d_r2; ldc = nO*nO;
|
||||
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, nO*nO, nV*nV, &alpha, A, lda, &beta, B, ldb, C, ldc);
|
||||
|
||||
double * Y_oovv = malloc(nO*nO*nV*nV*sizeof(double));
|
||||
lda=nO*nO;
|
||||
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_Y_oovv, lda, Y_oovv, lda);
|
||||
cudaFree(d_Y_oovv);
|
||||
|
||||
double * r2_tmp = malloc(nO*nO*nV*nV*sizeof(double));
|
||||
lda=nO*nO;
|
||||
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_r2, lda, r2_tmp, lda);
|
||||
|
||||
for (int j=0 ; j<nV ; ++j) {
|
||||
for (int i=0 ; i<nV ; ++i) {
|
||||
double * xx = &(r2_tmp[nO*nO*(i+nV*j)]);
|
||||
const double * yy = &(Y_oovv[nO*nO*(j+nV*i)]);
|
||||
for (int k=0 ; k<nO ; ++k) {
|
||||
for (int l=0 ; l<nO ; ++l) {
|
||||
xx[l + k*nO] += yy[k + l*nO];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(Y_oovv);
|
||||
|
||||
lda=nO*nO;
|
||||
cublasSetMatrix(nO*nO, nV*nV, sizeof(double), r2_tmp, lda, d_r2, lda);
|
||||
free(r2_tmp);
|
||||
|
||||
/*
|
||||
for (int j=0 ; j<nV ; ++j) {
|
||||
for (int i=0 ; i<nV ; ++i) {
|
||||
alpha = 1.0;
|
||||
@ -315,6 +343,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
}
|
||||
}
|
||||
cudaFree(d_Y_oovv);
|
||||
*/
|
||||
}
|
||||
|
||||
// g_occ
|
||||
@ -352,6 +381,34 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
B = d_X_oovv; ldb = nO*nO;
|
||||
C = d_r2; ldc = nO*nO;
|
||||
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, nO*nO, nV*nV, &alpha, A, lda, &beta, B, ldb, C, ldc);
|
||||
|
||||
double * X_oovv = malloc(nO*nO*nV*nV*sizeof(double));
|
||||
lda=nO*nO;
|
||||
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_X_oovv, lda, X_oovv, lda);
|
||||
cudaFree(d_X_oovv);
|
||||
|
||||
double * r2_tmp = malloc(nO*nO*nV*nV*sizeof(double));
|
||||
lda=nO*nO;
|
||||
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_r2, lda, r2_tmp, lda);
|
||||
|
||||
for (int j=0 ; j<nV ; ++j) {
|
||||
for (int i=0 ; i<nV ; ++i) {
|
||||
double * xx = &(r2_tmp[nO*nO*(i+nV*j)]);
|
||||
const double * yy = &(X_oovv[nO*nO*(j+nV*i)]);
|
||||
for (int k=0 ; k<nO ; ++k) {
|
||||
for (int l=0 ; l<nO ; ++l) {
|
||||
xx[l + k*nO] -= yy[k + l*nO];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(X_oovv);
|
||||
|
||||
lda=nO*nO;
|
||||
cublasSetMatrix(nO*nO, nV*nV, sizeof(double), r2_tmp, lda, d_r2, lda);
|
||||
free(r2_tmp);
|
||||
|
||||
/*
|
||||
for (int j=0 ; j<nV ; ++j) {
|
||||
for (int i=0 ; i<nV ; ++i) {
|
||||
alpha = 1.0;
|
||||
@ -363,6 +420,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
}
|
||||
}
|
||||
cudaFree(d_X_oovv);
|
||||
*/
|
||||
}
|
||||
|
||||
#pragma omp section
|
||||
|
Loading…
Reference in New Issue
Block a user