1
0
mirror of https://gitlab.com/scemama/qp_plugins_scemama.git synced 2024-11-07 06:33:40 +01:00

Removing dgeam

This commit is contained in:
Anthony Scemama 2023-07-20 18:17:46 +02:00
parent 0858cb290f
commit cf980f0fae

View File

@ -303,6 +303,34 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
B = d_Y_oovv; ldb = nO*nO; B = d_Y_oovv; ldb = nO*nO;
C = d_r2; ldc = nO*nO; C = d_r2; ldc = nO*nO;
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, nO*nO, nV*nV, &alpha, A, lda, &beta, B, ldb, C, ldc); cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, nO*nO, nV*nV, &alpha, A, lda, &beta, B, ldb, C, ldc);
double * Y_oovv = malloc(nO*nO*nV*nV*sizeof(double));
lda=nO*nO;
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_Y_oovv, lda, Y_oovv, lda);
cudaFree(d_Y_oovv);
double * r2_tmp = malloc(nO*nO*nV*nV*sizeof(double));
lda=nO*nO;
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_r2, lda, r2_tmp, lda);
for (int j=0 ; j<nV ; ++j) {
for (int i=0 ; i<nV ; ++i) {
double * xx = &(r2_tmp[nO*nO*(i+nV*j)]);
const double * yy = &(Y_oovv[nO*nO*(j+nV*i)]);
for (int k=0 ; k<nO ; ++k) {
for (int l=0 ; l<nO ; ++l) {
xx[l + k*nO] += yy[k + l*nO];
}
}
}
}
free(Y_oovv);
lda=nO*nO;
cublasSetMatrix(nO*nO, nV*nV, sizeof(double), r2_tmp, lda, d_r2, lda);
free(r2_tmp);
/*
for (int j=0 ; j<nV ; ++j) { for (int j=0 ; j<nV ; ++j) {
for (int i=0 ; i<nV ; ++i) { for (int i=0 ; i<nV ; ++i) {
alpha = 1.0; alpha = 1.0;
@ -315,6 +343,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
} }
} }
cudaFree(d_Y_oovv); cudaFree(d_Y_oovv);
*/
} }
// g_occ // g_occ
@ -352,6 +381,34 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
B = d_X_oovv; ldb = nO*nO; B = d_X_oovv; ldb = nO*nO;
C = d_r2; ldc = nO*nO; C = d_r2; ldc = nO*nO;
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, nO*nO, nV*nV, &alpha, A, lda, &beta, B, ldb, C, ldc); cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, nO*nO, nV*nV, &alpha, A, lda, &beta, B, ldb, C, ldc);
double * X_oovv = malloc(nO*nO*nV*nV*sizeof(double));
lda=nO*nO;
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_X_oovv, lda, X_oovv, lda);
cudaFree(d_X_oovv);
double * r2_tmp = malloc(nO*nO*nV*nV*sizeof(double));
lda=nO*nO;
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_r2, lda, r2_tmp, lda);
for (int j=0 ; j<nV ; ++j) {
for (int i=0 ; i<nV ; ++i) {
double * xx = &(r2_tmp[nO*nO*(i+nV*j)]);
const double * yy = &(X_oovv[nO*nO*(j+nV*i)]);
for (int k=0 ; k<nO ; ++k) {
for (int l=0 ; l<nO ; ++l) {
xx[l + k*nO] -= yy[k + l*nO];
}
}
}
}
free(X_oovv);
lda=nO*nO;
cublasSetMatrix(nO*nO, nV*nV, sizeof(double), r2_tmp, lda, d_r2, lda);
free(r2_tmp);
/*
for (int j=0 ; j<nV ; ++j) { for (int j=0 ; j<nV ; ++j) {
for (int i=0 ; i<nV ; ++i) { for (int i=0 ; i<nV ; ++i) {
alpha = 1.0; alpha = 1.0;
@ -363,6 +420,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
} }
} }
cudaFree(d_X_oovv); cudaFree(d_X_oovv);
*/
} }
#pragma omp section #pragma omp section