mirror of
https://gitlab.com/scemama/qp_plugins_scemama.git
synced 2024-11-07 06:33:40 +01:00
Added gpu_dgemm
This commit is contained in:
parent
0c6e6a1ca0
commit
5cec1b8a0c
@ -11,6 +11,53 @@ void dgemm_(char*, char*, int*, int*, int*, double*, double*, int*, double*, int
|
||||
|
||||
|
||||
|
||||
|
||||
void gpu_dgemm(char transa, char transb, int m, int n, int k, double alpha,
|
||||
double* A, int lda, double* B, int ldb, double beta, double* C, int ldc)
|
||||
{
|
||||
cublasHandle_t handle;
|
||||
cublasCreate(&handle);
|
||||
|
||||
double * d_A;
|
||||
double * d_B;
|
||||
double * d_C;
|
||||
cublasOperation_t ta, tb;
|
||||
|
||||
if (transa == 'N') {
|
||||
cudaMalloc((void**)&d_A, lda*k*sizeof(double));
|
||||
cublasSetMatrix(m, k, sizeof(double), A, lda, d_A, lda);
|
||||
ta = CUBLAS_OP_N;
|
||||
} else {
|
||||
cudaMalloc((void**)&d_A, lda*m*sizeof(double));
|
||||
cublasSetMatrix(k, m, sizeof(double), A, lda, d_A, lda);
|
||||
ta = CUBLAS_OP_T;
|
||||
}
|
||||
|
||||
if (transb == 'N') {
|
||||
cudaMalloc((void**)&d_B, ldb*n*sizeof(double));
|
||||
cublasSetMatrix(k, n, sizeof(double), B, ldb, d_B, ldb);
|
||||
tb = CUBLAS_OP_N;
|
||||
} else {
|
||||
cudaMalloc((void**)&d_B, ldb*k*sizeof(double));
|
||||
cublasSetMatrix(n, k, sizeof(double), B, ldb, d_B, ldb);
|
||||
tb = CUBLAS_OP_T;
|
||||
}
|
||||
|
||||
cudaMalloc((void**)&d_C, ldc*n*sizeof(double));
|
||||
if (beta != 0.) {
|
||||
cublasSetMatrix(m, n, sizeof(double), C, ldc, d_C, ldc);
|
||||
}
|
||||
|
||||
cublasDgemm(handle, ta, tb, m, n, k, &alpha, d_A, lda, d_B, ldb, &beta, d_C, ldc);
|
||||
|
||||
cublasGetMatrix(m, n, sizeof(double), d_C, ldc, C, ldc);
|
||||
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_B);
|
||||
cudaFree(d_C);
|
||||
cublasDestroy(handle);
|
||||
}
|
||||
|
||||
void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo_num,
|
||||
double* t1,
|
||||
double* tau,
|
||||
@ -35,7 +82,6 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
double* d_r2;
|
||||
lda = nO * nO;
|
||||
cudaMalloc((void **)&d_r2, lda * nV * nV * sizeof(double));
|
||||
cublasSetMatrix(nO*nO, nV*nV, sizeof(double), r2, lda, d_r2, lda);
|
||||
|
||||
double* d_cc_space_v_vv_chol;
|
||||
lda = cholesky_mo_num * nV;
|
||||
@ -60,9 +106,13 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
m=cholesky_mo_num*nV; n=nV; k=nO;
|
||||
A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc;
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m);
|
||||
cublasDestroy(handle);
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
cublasHandle_t handle;
|
||||
cublasCreate(&handle);
|
||||
|
||||
double* d_tmp_cc2;
|
||||
cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double));
|
||||
|
||||
@ -126,42 +176,26 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
|
||||
}
|
||||
}
|
||||
cublasDestroy(handle);
|
||||
}
|
||||
lda=nO*nO;
|
||||
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_r2, lda, r2, lda);
|
||||
|
||||
cudaFree(d_cc_space_v_vo_chol);
|
||||
cudaFree(d_cc_space_v_vv_chol);
|
||||
cudaFree(d_tau);
|
||||
cudaFree(d_t1);
|
||||
cudaFree(d_tmp_cc);
|
||||
cudaFree(d_r2);
|
||||
cublasDestroy(handle);
|
||||
|
||||
double * r2_tmp = malloc(nO*nO*nV*nV*sizeof(double));
|
||||
lda=nO*nO;
|
||||
cublasGetMatrix(nO*nO, nV*nV, sizeof(double), d_r2, lda, r2_tmp, lda);
|
||||
for (size_t i=0 ; i<nO*nO*nV*nV ; ++i) {
|
||||
r2[i] += r2_tmp[i];
|
||||
}
|
||||
free(r2_tmp);
|
||||
|
||||
cudaFree(d_r2);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
||||
cublasHandle_t handle;
|
||||
cublasCreate(&handle);
|
||||
|
||||
double *d_A, *d_B, *d_C;
|
||||
cudaMalloc((void **)&d_A, m * k * sizeof(double));
|
||||
cudaMalloc((void **)&d_B, k * n * sizeof(double));
|
||||
cudaMalloc((void **)&d_C, m * n * sizeof(double));
|
||||
|
||||
cublasSetMatrix(m, k, sizeof(double), A, m, d_A, m);
|
||||
cublasSetMatrix(k, n, sizeof(double), B, k, d_B, k);
|
||||
cublasSetMatrix(m, n, sizeof(double), C, m, d_C, m);
|
||||
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, d_A, m, d_B, k, &beta, d_C, m);
|
||||
|
||||
cublasGetMatrix(m, n, sizeof(double), d_C, m, C, m);
|
||||
|
||||
cudaFree(d_A);
|
||||
cudaFree(d_B);
|
||||
cudaFree(d_C);
|
||||
cublasDestroy(handle);
|
||||
|
||||
*/
|
||||
|
@ -15,37 +15,13 @@ module gpu_module
|
||||
real(c_double), intent(out) :: r2(nO,nO,nV,nV)
|
||||
end subroutine
|
||||
|
||||
subroutine gemm0(nO, nV, cholesky_mo_num, cc_space_v_vo_chol, t1, tmp_cc) bind(C, name="gemm0")
|
||||
import c_int, c_double
|
||||
integer(c_int), value :: nO, nV, cholesky_mo_num
|
||||
real(c_double) :: cc_space_v_vo_chol(cholesky_mo_num,nV,nO)
|
||||
real(c_double) :: t1(nO,nV)
|
||||
real(c_double) :: tmp_cc(cholesky_mo_num,nV,nV)
|
||||
end subroutine gemm0
|
||||
|
||||
subroutine gemm1(iblock, nV, cholesky_mo_num, tmp_cc, cc_space_v_vv_chol_, tmpB1) bind(C, name="gemm1")
|
||||
import c_int, c_double
|
||||
integer(c_int), value :: iblock, nV, cholesky_mo_num
|
||||
real(c_double) :: tmp_cc(cholesky_mo_num,nV,nV)
|
||||
real(c_double) :: cc_space_v_vv_chol_(cholesky_mo_num,nV)
|
||||
real(c_double) :: tmpB1(nV,16,nV)
|
||||
end subroutine gemm1
|
||||
|
||||
subroutine gemm2(iblock, nV, cholesky_mo_num, tmp_cc2, cc_space_v_vv_chol, tmpB1) bind(C, name="gemm2")
|
||||
import c_int, c_double
|
||||
integer(c_int), value :: iblock, nV, cholesky_mo_num
|
||||
real(c_double) :: tmp_cc2(cholesky_mo_num,nV)
|
||||
real(c_double) :: cc_space_v_vv_chol(cholesky_mo_num,nV,nV)
|
||||
real(c_double) :: tmpB1(nV,16,nV)
|
||||
end subroutine gemm2
|
||||
|
||||
subroutine gemm3(iblock, nO, nV, gam, tau, B1, r2) bind(C, name="gemm3")
|
||||
import c_int, c_double
|
||||
integer(c_int), value :: iblock, nO, nV, gam
|
||||
real(c_double) :: tau(nO,nO,nV,nV)
|
||||
real(c_double) :: B1(nV,nV,*)
|
||||
real(c_double) :: r2(nO,nO,nV,nV)
|
||||
end subroutine gemm3
|
||||
subroutine gpu_dgemm(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) bind(C)
|
||||
import c_int, c_double, c_char
|
||||
character(c_char), value :: transa, transb
|
||||
integer(c_int), value :: m,n,k,lda,ldb,ldc
|
||||
real(c_double), value :: alpha, beta
|
||||
real(c_double) :: A(lda,*), B(ldb,*), C(ldc,*)
|
||||
end subroutine
|
||||
|
||||
end interface
|
||||
|
||||
|
1679
devel/ccsd_gpu/mo_integrals_cc.irp.f
Normal file
1679
devel/ccsd_gpu/mo_integrals_cc.irp.f
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user