1
0
mirror of https://gitlab.com/scemama/qp_plugins_scemama.git synced 2024-12-22 12:23:37 +01:00

Preparing files

This commit is contained in:
Anthony Scemama 2023-07-16 21:18:57 +02:00
parent 2df6c19772
commit c45db49df5
3 changed files with 69 additions and 64 deletions

View File

@ -470,46 +470,46 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) ! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) & ! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
! call dgemm('N','N', nO, nO*nO*nO, nV, & call dgemm('N','N', nO, nO*nO*nO, nV, &
! 1d0, t1 , size(t1,1), & 1d0, t1 , size(t1,1), &
! cc_space_v_vooo, size(cc_space_v_vooo,1), & cc_space_v_vooo, size(cc_space_v_vooo,1), &
! 0d0, Y_oooo, size(Y_oooo,1)) 0d0, Y_oooo, size(Y_oooo,1))
!
! !$omp parallel & !$omp parallel &
! !$omp private(u,v,i,j) & !$omp private(u,v,i,j) &
! !$omp default(shared) !$omp default(shared)
! !$omp do collapse(2) !$omp do collapse(2)
! do j = 1, nO do j = 1, nO
! do i = 1, nO do i = 1, nO
! do v = 1, nO do v = 1, nO
! do u = 1, nO do u = 1, nO
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j) A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j)
! enddo enddo
! enddo enddo
! enddo enddo
! enddo enddo
! !$omp end do !$omp end do
! !$omp end parallel !$omp end parallel
!
! deallocate(Y_oooo) deallocate(Y_oooo)
!
! ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b) ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
! call dgemm('N','N', nO*nO, nO*nO, nV*nV, & call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
! 1d0, tau , size(tau,1) * size(tau,2), & 1d0, tau , size(tau,1) * size(tau,2), &
! cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), & cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
! 1d0, A1 , size(A1,1) * size(A1,2)) 1d0, A1 , size(A1,1) * size(A1,2))
!
! call dgemm('N','N',nO*nO,nV*nV,nO*nO, & call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
! 1d0, A1, size(A1,1) * size(A1,2), & 1d0, A1, size(A1,1) * size(A1,2), &
! tau, size(tau,1) * size(tau,2), & tau, size(tau,1) * size(tau,2), &
! 0d0, r2, size(r2,1) * size(r2,2)) 0d0, r2, size(r2,1) * size(r2,2))
!
! deallocate(A1) deallocate(A1)
call compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num,t1,tau, & call compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num,t1,tau, &
cc_space_v_vo_chol, cc_space_v_vv_chol, & cc_space_v_vo_chol, cc_space_v_vv_chol, &
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, & cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, cc_space_v_vvoo, &
r2) r2)
double precision, allocatable :: X_oovv(:,:,:,:) double precision, allocatable :: X_oovv(:,:,:,:)

View File

@ -67,6 +67,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
double* cc_space_v_oooo, double* cc_space_v_oooo,
double* cc_space_v_vooo, double* cc_space_v_vooo,
double* cc_space_v_oovv, double* cc_space_v_oovv,
double* cc_space_v_vvoo,
double* r2) double* r2)
{ {
double* d_tau; double* d_tau;
@ -121,24 +122,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
cudaMalloc((void **)&d_t1, nO * nV * sizeof(double)); cudaMalloc((void **)&d_t1, nO * nV * sizeof(double));
cublasSetMatrix(nO, nV, sizeof(double), t1, lda, d_t1, lda); cublasSetMatrix(nO, nV, sizeof(double), t1, lda, d_t1, lda);
lda = cholesky_mo_num * nV; #pragma omp sections
cudaMalloc((void **)&d_tmp_cc, lda * nV * sizeof(double));
alpha=1.0; beta=0.0;
m=cholesky_mo_num*nV; n=nV; k=nO;
A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m);
double* d_tmp_cc2;
cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double));
double* d_B1;
cudaMalloc((void**)&d_B1, nV*nV*BLOCK_SIZE*sizeof(double));
double* d_tmpB1;
cudaMalloc((void**)&d_tmpB1, nV*BLOCK_SIZE*nV*sizeof(double));
#pragma sections
{ {
#pragma omp section #pragma omp section
for (size_t i=0 ; i<nO*nO*nV*nV ; ++i) for (size_t i=0 ; i<nO*nO*nV*nV ; ++i)
@ -146,6 +130,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
r2[i] += cc_space_v_oovv[i]; r2[i] += cc_space_v_oovv[i];
} }
/*
#pragma omp section #pragma omp section
{ {
double* d_cc_space_v_vooo; double* d_cc_space_v_vooo;
@ -159,9 +144,9 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
beta = 0.0; beta = 0.0;
m=nO ; n=nO*nO*nO; k=nV; m=nO ; n=nO*nO*nO; k=nV;
A = d_t1 ; lda = nO; A = d_t1 ; lda = nO;
B = d_cc_space_v_vooo ; ldb = nO; B = d_cc_space_v_vooo ; ldb = nV;
C = d_Y_oooo; ldc = nO; C = d_Y_oooo; ldc = nO;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc); cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
cudaFree(d_cc_space_v_vooo); cudaFree(d_cc_space_v_vooo);
double* d_A1; double* d_A1;
@ -181,9 +166,9 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
for (int i=0 ; i<nO ; ++i) { for (int i=0 ; i<nO ; ++i) {
alpha = 1.0; alpha = 1.0;
beta = 1.0; beta = 1.0;
A = d_A1[nO*nO*(i+nO*j)]; lda = nO; A = &(d_A1[nO*nO*(i+nO*j)]); lda = nO;
B = d_Y_oooo[nO*nO*(j+nO*i)]; ldb = nO; B = &(d_Y_oooo[nO*nO*(j+nO*i)]); ldb = nO;
C = d_A1[nO*nO*(i+nO*j)]; ldc = nO; C = &(d_A1[nO*nO*(i+nO*j)]); ldc = nO;
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_T, nO, nO, &alpha, A, lda, &beta, B, ldb, C, ldc); cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_T, nO, nO, &alpha, A, lda, &beta, B, ldb, C, ldc);
} }
@ -200,8 +185,8 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
A = d_tau ; lda = nO*nO; A = d_tau ; lda = nO*nO;
B = d_cc_space_v_vvoo ; ldb = nV*nV; B = d_cc_space_v_vvoo ; ldb = nV*nV;
C = d_A1; ldc = nO*nO; C = d_A1; ldc = nO*nO;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc); cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
cudafree(d_cc_space_v_vvoo); cudaFree(d_cc_space_v_vvoo);
alpha = 1.0; alpha = 1.0;
beta = 0.0; beta = 0.0;
@ -209,12 +194,31 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
A = d_A1 ; lda = nO*nO; A = d_A1 ; lda = nO*nO;
B = d_tau ; ldb = nO*nO; B = d_tau ; ldb = nO*nO;
C = d_r2; ldc = nO*nO; C = d_r2; ldc = nO*nO;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc); cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
cudafree(A1); cudaFree(d_A1);
} }
*/
} }
lda = cholesky_mo_num * nV;
cudaMalloc((void **)&d_tmp_cc, lda * nV * sizeof(double));
alpha=1.0; beta=0.0;
m=cholesky_mo_num*nV; n=nV; k=nO;
A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m);
double* d_tmp_cc2;
cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double));
double* d_B1;
cudaMalloc((void**)&d_B1, nV*nV*BLOCK_SIZE*sizeof(double));
double* d_tmpB1;
cudaMalloc((void**)&d_tmpB1, nV*BLOCK_SIZE*nV*sizeof(double));
#pragma omp for #pragma omp for
for (size_t gam=0 ; gam<nV ; ++gam) for (size_t gam=0 ; gam<nV ; ++gam)
{ {

View File

@ -6,7 +6,7 @@ module gpu_module
interface interface
subroutine compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num, t1,tau,& subroutine compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num, t1,tau,&
cc_space_v_vo_chol,cc_space_v_vv_chol, & cc_space_v_vo_chol,cc_space_v_vv_chol, &
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, & cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, cc_space_v_vvoo, &
r2) bind(C) r2) bind(C)
import c_int, c_double import c_int, c_double
integer(c_int), value :: nO, nV, cholesky_mo_num integer(c_int), value :: nO, nV, cholesky_mo_num
@ -17,6 +17,7 @@ module gpu_module
real(c_double), intent(in) :: cc_space_v_oooo(nO,nO,nO,nO) real(c_double), intent(in) :: cc_space_v_oooo(nO,nO,nO,nO)
real(c_double), intent(in) :: cc_space_v_vooo(nV,nO,nO,nO) real(c_double), intent(in) :: cc_space_v_vooo(nV,nO,nO,nO)
real(c_double), intent(in) :: cc_space_v_oovv(nO,nO,nV,nV) real(c_double), intent(in) :: cc_space_v_oovv(nO,nO,nV,nV)
real(c_double), intent(in) :: cc_space_v_vvoo(nV,nV,nO,nO)
real(c_double), intent(out) :: r2(nO,nO,nV,nV) real(c_double), intent(out) :: r2(nO,nO,nV,nV)
end subroutine end subroutine