Preparing files

This commit is contained in:
Anthony Scemama 2023-07-16 21:18:57 +02:00
parent 2df6c19772
commit c45db49df5
3 changed files with 69 additions and 64 deletions

View File

@ -470,46 +470,46 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
! call dgemm('N','N', nO, nO*nO*nO, nV, &
! 1d0, t1 , size(t1,1), &
! cc_space_v_vooo, size(cc_space_v_vooo,1), &
! 0d0, Y_oooo, size(Y_oooo,1))
!
! !$omp parallel &
! !$omp private(u,v,i,j) &
! !$omp default(shared)
! !$omp do collapse(2)
! do j = 1, nO
! do i = 1, nO
! do v = 1, nO
! do u = 1, nO
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j)
! enddo
! enddo
! enddo
! enddo
! !$omp end do
! !$omp end parallel
!
! deallocate(Y_oooo)
!
! ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
! call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
! 1d0, tau , size(tau,1) * size(tau,2), &
! cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
! 1d0, A1 , size(A1,1) * size(A1,2))
!
! call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
! 1d0, A1, size(A1,1) * size(A1,2), &
! tau, size(tau,1) * size(tau,2), &
! 0d0, r2, size(r2,1) * size(r2,2))
!
! deallocate(A1)
call dgemm('N','N', nO, nO*nO*nO, nV, &
1d0, t1 , size(t1,1), &
cc_space_v_vooo, size(cc_space_v_vooo,1), &
0d0, Y_oooo, size(Y_oooo,1))
!$omp parallel &
!$omp private(u,v,i,j) &
!$omp default(shared)
!$omp do collapse(2)
do j = 1, nO
do i = 1, nO
do v = 1, nO
do u = 1, nO
A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j)
enddo
enddo
enddo
enddo
!$omp end do
!$omp end parallel
deallocate(Y_oooo)
! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
1d0, tau , size(tau,1) * size(tau,2), &
cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
1d0, A1 , size(A1,1) * size(A1,2))
call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
1d0, A1, size(A1,1) * size(A1,2), &
tau, size(tau,1) * size(tau,2), &
0d0, r2, size(r2,1) * size(r2,2))
deallocate(A1)
call compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num,t1,tau, &
cc_space_v_vo_chol, cc_space_v_vv_chol, &
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, &
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, cc_space_v_vvoo, &
r2)
double precision, allocatable :: X_oovv(:,:,:,:)

View File

@ -67,6 +67,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
double* cc_space_v_oooo,
double* cc_space_v_vooo,
double* cc_space_v_oovv,
double* cc_space_v_vvoo,
double* r2)
{
double* d_tau;
@ -121,24 +122,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
cudaMalloc((void **)&d_t1, nO * nV * sizeof(double));
cublasSetMatrix(nO, nV, sizeof(double), t1, lda, d_t1, lda);
lda = cholesky_mo_num * nV;
cudaMalloc((void **)&d_tmp_cc, lda * nV * sizeof(double));
alpha=1.0; beta=0.0;
m=cholesky_mo_num*nV; n=nV; k=nO;
A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m);
double* d_tmp_cc2;
cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double));
double* d_B1;
cudaMalloc((void**)&d_B1, nV*nV*BLOCK_SIZE*sizeof(double));
double* d_tmpB1;
cudaMalloc((void**)&d_tmpB1, nV*BLOCK_SIZE*nV*sizeof(double));
#pragma sections
#pragma omp sections
{
#pragma omp section
for (size_t i=0 ; i<nO*nO*nV*nV ; ++i)
@ -146,6 +130,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
r2[i] += cc_space_v_oovv[i];
}
/*
#pragma omp section
{
double* d_cc_space_v_vooo;
@ -159,9 +144,9 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
beta = 0.0;
m=nO ; n=nO*nO*nO; k=nV;
A = d_t1 ; lda = nO;
B = d_cc_space_v_vooo ; ldb = nO;
B = d_cc_space_v_vooo ; ldb = nV;
C = d_Y_oooo; ldc = nO;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
cudaFree(d_cc_space_v_vooo);
double* d_A1;
@ -181,9 +166,9 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
for (int i=0 ; i<nO ; ++i) {
alpha = 1.0;
beta = 1.0;
A = d_A1[nO*nO*(i+nO*j)]; lda = nO;
B = d_Y_oooo[nO*nO*(j+nO*i)]; ldb = nO;
C = d_A1[nO*nO*(i+nO*j)]; ldc = nO;
A = &(d_A1[nO*nO*(i+nO*j)]); lda = nO;
B = &(d_Y_oooo[nO*nO*(j+nO*i)]); ldb = nO;
C = &(d_A1[nO*nO*(i+nO*j)]); ldc = nO;
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_T, nO, nO, &alpha, A, lda, &beta, B, ldb, C, ldc);
}
@ -200,8 +185,8 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
A = d_tau ; lda = nO*nO;
B = d_cc_space_v_vvoo ; ldb = nV*nV;
C = d_A1; ldc = nO*nO;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
cudafree(d_cc_space_v_vvoo);
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
cudaFree(d_cc_space_v_vvoo);
alpha = 1.0;
beta = 0.0;
@ -209,12 +194,31 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
A = d_A1 ; lda = nO*nO;
B = d_tau ; ldb = nO*nO;
C = d_r2; ldc = nO*nO;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
cudafree(A1);
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
cudaFree(d_A1);
}
*/
}
lda = cholesky_mo_num * nV;
cudaMalloc((void **)&d_tmp_cc, lda * nV * sizeof(double));
alpha=1.0; beta=0.0;
m=cholesky_mo_num*nV; n=nV; k=nO;
A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc;
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m);
double* d_tmp_cc2;
cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double));
double* d_B1;
cudaMalloc((void**)&d_B1, nV*nV*BLOCK_SIZE*sizeof(double));
double* d_tmpB1;
cudaMalloc((void**)&d_tmpB1, nV*BLOCK_SIZE*nV*sizeof(double));
#pragma omp for
for (size_t gam=0 ; gam<nV ; ++gam)
{

View File

@ -6,7 +6,7 @@ module gpu_module
interface
subroutine compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num, t1,tau,&
cc_space_v_vo_chol,cc_space_v_vv_chol, &
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, &
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, cc_space_v_vvoo, &
r2) bind(C)
import c_int, c_double
integer(c_int), value :: nO, nV, cholesky_mo_num
@ -17,6 +17,7 @@ module gpu_module
real(c_double), intent(in) :: cc_space_v_oooo(nO,nO,nO,nO)
real(c_double), intent(in) :: cc_space_v_vooo(nV,nO,nO,nO)
real(c_double), intent(in) :: cc_space_v_oovv(nO,nO,nV,nV)
real(c_double), intent(in) :: cc_space_v_vvoo(nV,nV,nO,nO)
real(c_double), intent(out) :: r2(nO,nO,nV,nV)
end subroutine