mirror of
https://gitlab.com/scemama/qp_plugins_scemama.git
synced 2024-12-22 04:13:40 +01:00
Preparing files
This commit is contained in:
parent
2df6c19772
commit
c45db49df5
@ -470,46 +470,46 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
|
||||
! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
|
||||
|
||||
! call dgemm('N','N', nO, nO*nO*nO, nV, &
|
||||
! 1d0, t1 , size(t1,1), &
|
||||
! cc_space_v_vooo, size(cc_space_v_vooo,1), &
|
||||
! 0d0, Y_oooo, size(Y_oooo,1))
|
||||
!
|
||||
! !$omp parallel &
|
||||
! !$omp private(u,v,i,j) &
|
||||
! !$omp default(shared)
|
||||
! !$omp do collapse(2)
|
||||
! do j = 1, nO
|
||||
! do i = 1, nO
|
||||
! do v = 1, nO
|
||||
! do u = 1, nO
|
||||
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j)
|
||||
! enddo
|
||||
! enddo
|
||||
! enddo
|
||||
! enddo
|
||||
! !$omp end do
|
||||
! !$omp end parallel
|
||||
!
|
||||
! deallocate(Y_oooo)
|
||||
!
|
||||
! ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
|
||||
! call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
|
||||
! 1d0, tau , size(tau,1) * size(tau,2), &
|
||||
! cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
|
||||
! 1d0, A1 , size(A1,1) * size(A1,2))
|
||||
!
|
||||
! call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
|
||||
! 1d0, A1, size(A1,1) * size(A1,2), &
|
||||
! tau, size(tau,1) * size(tau,2), &
|
||||
! 0d0, r2, size(r2,1) * size(r2,2))
|
||||
!
|
||||
! deallocate(A1)
|
||||
call dgemm('N','N', nO, nO*nO*nO, nV, &
|
||||
1d0, t1 , size(t1,1), &
|
||||
cc_space_v_vooo, size(cc_space_v_vooo,1), &
|
||||
0d0, Y_oooo, size(Y_oooo,1))
|
||||
|
||||
!$omp parallel &
|
||||
!$omp private(u,v,i,j) &
|
||||
!$omp default(shared)
|
||||
!$omp do collapse(2)
|
||||
do j = 1, nO
|
||||
do i = 1, nO
|
||||
do v = 1, nO
|
||||
do u = 1, nO
|
||||
A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$omp end do
|
||||
!$omp end parallel
|
||||
|
||||
deallocate(Y_oooo)
|
||||
|
||||
! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
|
||||
call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
|
||||
1d0, tau , size(tau,1) * size(tau,2), &
|
||||
cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
|
||||
1d0, A1 , size(A1,1) * size(A1,2))
|
||||
|
||||
call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
|
||||
1d0, A1, size(A1,1) * size(A1,2), &
|
||||
tau, size(tau,1) * size(tau,2), &
|
||||
0d0, r2, size(r2,1) * size(r2,2))
|
||||
|
||||
deallocate(A1)
|
||||
|
||||
|
||||
call compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num,t1,tau, &
|
||||
cc_space_v_vo_chol, cc_space_v_vv_chol, &
|
||||
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, &
|
||||
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, cc_space_v_vvoo, &
|
||||
r2)
|
||||
|
||||
double precision, allocatable :: X_oovv(:,:,:,:)
|
||||
|
@ -67,6 +67,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
double* cc_space_v_oooo,
|
||||
double* cc_space_v_vooo,
|
||||
double* cc_space_v_oovv,
|
||||
double* cc_space_v_vvoo,
|
||||
double* r2)
|
||||
{
|
||||
double* d_tau;
|
||||
@ -121,24 +122,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
cudaMalloc((void **)&d_t1, nO * nV * sizeof(double));
|
||||
cublasSetMatrix(nO, nV, sizeof(double), t1, lda, d_t1, lda);
|
||||
|
||||
lda = cholesky_mo_num * nV;
|
||||
cudaMalloc((void **)&d_tmp_cc, lda * nV * sizeof(double));
|
||||
|
||||
alpha=1.0; beta=0.0;
|
||||
m=cholesky_mo_num*nV; n=nV; k=nO;
|
||||
A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc;
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m);
|
||||
|
||||
double* d_tmp_cc2;
|
||||
cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double));
|
||||
|
||||
double* d_B1;
|
||||
cudaMalloc((void**)&d_B1, nV*nV*BLOCK_SIZE*sizeof(double));
|
||||
|
||||
double* d_tmpB1;
|
||||
cudaMalloc((void**)&d_tmpB1, nV*BLOCK_SIZE*nV*sizeof(double));
|
||||
|
||||
#pragma sections
|
||||
#pragma omp sections
|
||||
{
|
||||
#pragma omp section
|
||||
for (size_t i=0 ; i<nO*nO*nV*nV ; ++i)
|
||||
@ -146,6 +130,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
r2[i] += cc_space_v_oovv[i];
|
||||
}
|
||||
|
||||
/*
|
||||
#pragma omp section
|
||||
{
|
||||
double* d_cc_space_v_vooo;
|
||||
@ -159,9 +144,9 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
beta = 0.0;
|
||||
m=nO ; n=nO*nO*nO; k=nV;
|
||||
A = d_t1 ; lda = nO;
|
||||
B = d_cc_space_v_vooo ; ldb = nO;
|
||||
B = d_cc_space_v_vooo ; ldb = nV;
|
||||
C = d_Y_oooo; ldc = nO;
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
|
||||
cudaFree(d_cc_space_v_vooo);
|
||||
|
||||
double* d_A1;
|
||||
@ -181,9 +166,9 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
for (int i=0 ; i<nO ; ++i) {
|
||||
alpha = 1.0;
|
||||
beta = 1.0;
|
||||
A = d_A1[nO*nO*(i+nO*j)]; lda = nO;
|
||||
B = d_Y_oooo[nO*nO*(j+nO*i)]; ldb = nO;
|
||||
C = d_A1[nO*nO*(i+nO*j)]; ldc = nO;
|
||||
A = &(d_A1[nO*nO*(i+nO*j)]); lda = nO;
|
||||
B = &(d_Y_oooo[nO*nO*(j+nO*i)]); ldb = nO;
|
||||
C = &(d_A1[nO*nO*(i+nO*j)]); ldc = nO;
|
||||
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_T, nO, nO, &alpha, A, lda, &beta, B, ldb, C, ldc);
|
||||
|
||||
}
|
||||
@ -200,8 +185,8 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
A = d_tau ; lda = nO*nO;
|
||||
B = d_cc_space_v_vvoo ; ldb = nV*nV;
|
||||
C = d_A1; ldc = nO*nO;
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
|
||||
cudafree(d_cc_space_v_vvoo);
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
|
||||
cudaFree(d_cc_space_v_vvoo);
|
||||
|
||||
alpha = 1.0;
|
||||
beta = 0.0;
|
||||
@ -209,12 +194,31 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
A = d_A1 ; lda = nO*nO;
|
||||
B = d_tau ; ldb = nO*nO;
|
||||
C = d_r2; ldc = nO*nO;
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
|
||||
cudafree(A1);
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
|
||||
cudaFree(d_A1);
|
||||
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
lda = cholesky_mo_num * nV;
|
||||
cudaMalloc((void **)&d_tmp_cc, lda * nV * sizeof(double));
|
||||
|
||||
alpha=1.0; beta=0.0;
|
||||
m=cholesky_mo_num*nV; n=nV; k=nO;
|
||||
A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc;
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m);
|
||||
|
||||
double* d_tmp_cc2;
|
||||
cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double));
|
||||
|
||||
double* d_B1;
|
||||
cudaMalloc((void**)&d_B1, nV*nV*BLOCK_SIZE*sizeof(double));
|
||||
|
||||
double* d_tmpB1;
|
||||
cudaMalloc((void**)&d_tmpB1, nV*BLOCK_SIZE*nV*sizeof(double));
|
||||
|
||||
#pragma omp for
|
||||
for (size_t gam=0 ; gam<nV ; ++gam)
|
||||
{
|
||||
|
@ -6,7 +6,7 @@ module gpu_module
|
||||
interface
|
||||
subroutine compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num, t1,tau,&
|
||||
cc_space_v_vo_chol,cc_space_v_vv_chol, &
|
||||
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, &
|
||||
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, cc_space_v_vvoo, &
|
||||
r2) bind(C)
|
||||
import c_int, c_double
|
||||
integer(c_int), value :: nO, nV, cholesky_mo_num
|
||||
@ -17,6 +17,7 @@ module gpu_module
|
||||
real(c_double), intent(in) :: cc_space_v_oooo(nO,nO,nO,nO)
|
||||
real(c_double), intent(in) :: cc_space_v_vooo(nV,nO,nO,nO)
|
||||
real(c_double), intent(in) :: cc_space_v_oovv(nO,nO,nV,nV)
|
||||
real(c_double), intent(in) :: cc_space_v_vvoo(nV,nV,nO,nO)
|
||||
real(c_double), intent(out) :: r2(nO,nO,nV,nV)
|
||||
end subroutine
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user