mirror of
https://gitlab.com/scemama/qp_plugins_scemama.git
synced 2024-11-07 06:33:40 +01:00
Preparing files
This commit is contained in:
parent
2df6c19772
commit
c45db49df5
@ -470,46 +470,46 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
|||||||
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
|
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j)
|
||||||
! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
|
! A1(u,v,i,j) += cc_space_v_ovoo(u,a,i,j) * t1(v,a) &
|
||||||
|
|
||||||
! call dgemm('N','N', nO, nO*nO*nO, nV, &
|
call dgemm('N','N', nO, nO*nO*nO, nV, &
|
||||||
! 1d0, t1 , size(t1,1), &
|
1d0, t1 , size(t1,1), &
|
||||||
! cc_space_v_vooo, size(cc_space_v_vooo,1), &
|
cc_space_v_vooo, size(cc_space_v_vooo,1), &
|
||||||
! 0d0, Y_oooo, size(Y_oooo,1))
|
0d0, Y_oooo, size(Y_oooo,1))
|
||||||
!
|
|
||||||
! !$omp parallel &
|
!$omp parallel &
|
||||||
! !$omp private(u,v,i,j) &
|
!$omp private(u,v,i,j) &
|
||||||
! !$omp default(shared)
|
!$omp default(shared)
|
||||||
! !$omp do collapse(2)
|
!$omp do collapse(2)
|
||||||
! do j = 1, nO
|
do j = 1, nO
|
||||||
! do i = 1, nO
|
do i = 1, nO
|
||||||
! do v = 1, nO
|
do v = 1, nO
|
||||||
! do u = 1, nO
|
do u = 1, nO
|
||||||
! A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j)
|
A1(u,v,i,j) = cc_space_v_oooo(u,v,i,j) + Y_oooo(v,u,j,i) + Y_oooo(u,v,i,j)
|
||||||
! enddo
|
enddo
|
||||||
! enddo
|
enddo
|
||||||
! enddo
|
enddo
|
||||||
! enddo
|
enddo
|
||||||
! !$omp end do
|
!$omp end do
|
||||||
! !$omp end parallel
|
!$omp end parallel
|
||||||
!
|
|
||||||
! deallocate(Y_oooo)
|
deallocate(Y_oooo)
|
||||||
!
|
|
||||||
! ! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
|
! A1(u,v,i,j) += cc_space_v_vvoo(a,b,i,j) * tau(u,v,a,b)
|
||||||
! call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
|
call dgemm('N','N', nO*nO, nO*nO, nV*nV, &
|
||||||
! 1d0, tau , size(tau,1) * size(tau,2), &
|
1d0, tau , size(tau,1) * size(tau,2), &
|
||||||
! cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
|
cc_space_v_vvoo, size(cc_space_v_vvoo,1) * size(cc_space_v_vvoo,2), &
|
||||||
! 1d0, A1 , size(A1,1) * size(A1,2))
|
1d0, A1 , size(A1,1) * size(A1,2))
|
||||||
!
|
|
||||||
! call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
|
call dgemm('N','N',nO*nO,nV*nV,nO*nO, &
|
||||||
! 1d0, A1, size(A1,1) * size(A1,2), &
|
1d0, A1, size(A1,1) * size(A1,2), &
|
||||||
! tau, size(tau,1) * size(tau,2), &
|
tau, size(tau,1) * size(tau,2), &
|
||||||
! 0d0, r2, size(r2,1) * size(r2,2))
|
0d0, r2, size(r2,1) * size(r2,2))
|
||||||
!
|
|
||||||
! deallocate(A1)
|
deallocate(A1)
|
||||||
|
|
||||||
|
|
||||||
call compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num,t1,tau, &
|
call compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num,t1,tau, &
|
||||||
cc_space_v_vo_chol, cc_space_v_vv_chol, &
|
cc_space_v_vo_chol, cc_space_v_vv_chol, &
|
||||||
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, &
|
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, cc_space_v_vvoo, &
|
||||||
r2)
|
r2)
|
||||||
|
|
||||||
double precision, allocatable :: X_oovv(:,:,:,:)
|
double precision, allocatable :: X_oovv(:,:,:,:)
|
||||||
|
@ -67,6 +67,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
|||||||
double* cc_space_v_oooo,
|
double* cc_space_v_oooo,
|
||||||
double* cc_space_v_vooo,
|
double* cc_space_v_vooo,
|
||||||
double* cc_space_v_oovv,
|
double* cc_space_v_oovv,
|
||||||
|
double* cc_space_v_vvoo,
|
||||||
double* r2)
|
double* r2)
|
||||||
{
|
{
|
||||||
double* d_tau;
|
double* d_tau;
|
||||||
@ -121,24 +122,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
|||||||
cudaMalloc((void **)&d_t1, nO * nV * sizeof(double));
|
cudaMalloc((void **)&d_t1, nO * nV * sizeof(double));
|
||||||
cublasSetMatrix(nO, nV, sizeof(double), t1, lda, d_t1, lda);
|
cublasSetMatrix(nO, nV, sizeof(double), t1, lda, d_t1, lda);
|
||||||
|
|
||||||
lda = cholesky_mo_num * nV;
|
#pragma omp sections
|
||||||
cudaMalloc((void **)&d_tmp_cc, lda * nV * sizeof(double));
|
|
||||||
|
|
||||||
alpha=1.0; beta=0.0;
|
|
||||||
m=cholesky_mo_num*nV; n=nV; k=nO;
|
|
||||||
A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc;
|
|
||||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m);
|
|
||||||
|
|
||||||
double* d_tmp_cc2;
|
|
||||||
cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double));
|
|
||||||
|
|
||||||
double* d_B1;
|
|
||||||
cudaMalloc((void**)&d_B1, nV*nV*BLOCK_SIZE*sizeof(double));
|
|
||||||
|
|
||||||
double* d_tmpB1;
|
|
||||||
cudaMalloc((void**)&d_tmpB1, nV*BLOCK_SIZE*nV*sizeof(double));
|
|
||||||
|
|
||||||
#pragma sections
|
|
||||||
{
|
{
|
||||||
#pragma omp section
|
#pragma omp section
|
||||||
for (size_t i=0 ; i<nO*nO*nV*nV ; ++i)
|
for (size_t i=0 ; i<nO*nO*nV*nV ; ++i)
|
||||||
@ -146,6 +130,7 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
|||||||
r2[i] += cc_space_v_oovv[i];
|
r2[i] += cc_space_v_oovv[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
#pragma omp section
|
#pragma omp section
|
||||||
{
|
{
|
||||||
double* d_cc_space_v_vooo;
|
double* d_cc_space_v_vooo;
|
||||||
@ -159,9 +144,9 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
|||||||
beta = 0.0;
|
beta = 0.0;
|
||||||
m=nO ; n=nO*nO*nO; k=nV;
|
m=nO ; n=nO*nO*nO; k=nV;
|
||||||
A = d_t1 ; lda = nO;
|
A = d_t1 ; lda = nO;
|
||||||
B = d_cc_space_v_vooo ; ldb = nO;
|
B = d_cc_space_v_vooo ; ldb = nV;
|
||||||
C = d_Y_oooo; ldc = nO;
|
C = d_Y_oooo; ldc = nO;
|
||||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
|
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
|
||||||
cudaFree(d_cc_space_v_vooo);
|
cudaFree(d_cc_space_v_vooo);
|
||||||
|
|
||||||
double* d_A1;
|
double* d_A1;
|
||||||
@ -181,9 +166,9 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
|||||||
for (int i=0 ; i<nO ; ++i) {
|
for (int i=0 ; i<nO ; ++i) {
|
||||||
alpha = 1.0;
|
alpha = 1.0;
|
||||||
beta = 1.0;
|
beta = 1.0;
|
||||||
A = d_A1[nO*nO*(i+nO*j)]; lda = nO;
|
A = &(d_A1[nO*nO*(i+nO*j)]); lda = nO;
|
||||||
B = d_Y_oooo[nO*nO*(j+nO*i)]; ldb = nO;
|
B = &(d_Y_oooo[nO*nO*(j+nO*i)]); ldb = nO;
|
||||||
C = d_A1[nO*nO*(i+nO*j)]; ldc = nO;
|
C = &(d_A1[nO*nO*(i+nO*j)]); ldc = nO;
|
||||||
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_T, nO, nO, &alpha, A, lda, &beta, B, ldb, C, ldc);
|
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_T, nO, nO, &alpha, A, lda, &beta, B, ldb, C, ldc);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -200,8 +185,8 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
|||||||
A = d_tau ; lda = nO*nO;
|
A = d_tau ; lda = nO*nO;
|
||||||
B = d_cc_space_v_vvoo ; ldb = nV*nV;
|
B = d_cc_space_v_vvoo ; ldb = nV*nV;
|
||||||
C = d_A1; ldc = nO*nO;
|
C = d_A1; ldc = nO*nO;
|
||||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
|
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
|
||||||
cudafree(d_cc_space_v_vvoo);
|
cudaFree(d_cc_space_v_vvoo);
|
||||||
|
|
||||||
alpha = 1.0;
|
alpha = 1.0;
|
||||||
beta = 0.0;
|
beta = 0.0;
|
||||||
@ -209,12 +194,31 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
|||||||
A = d_A1 ; lda = nO*nO;
|
A = d_A1 ; lda = nO*nO;
|
||||||
B = d_tau ; ldb = nO*nO;
|
B = d_tau ; ldb = nO*nO;
|
||||||
C = d_r2; ldc = nO*nO;
|
C = d_r2; ldc = nO*nO;
|
||||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, lda, &beta, C, ldc);
|
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
|
||||||
cudafree(A1);
|
cudaFree(d_A1);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
lda = cholesky_mo_num * nV;
|
||||||
|
cudaMalloc((void **)&d_tmp_cc, lda * nV * sizeof(double));
|
||||||
|
|
||||||
|
alpha=1.0; beta=0.0;
|
||||||
|
m=cholesky_mo_num*nV; n=nV; k=nO;
|
||||||
|
A = d_cc_space_v_vo_chol; B = d_t1; C = d_tmp_cc;
|
||||||
|
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, m, B, k, &beta, C, m);
|
||||||
|
|
||||||
|
double* d_tmp_cc2;
|
||||||
|
cudaMalloc((void **)&d_tmp_cc2, cholesky_mo_num*nV*sizeof(double));
|
||||||
|
|
||||||
|
double* d_B1;
|
||||||
|
cudaMalloc((void**)&d_B1, nV*nV*BLOCK_SIZE*sizeof(double));
|
||||||
|
|
||||||
|
double* d_tmpB1;
|
||||||
|
cudaMalloc((void**)&d_tmpB1, nV*BLOCK_SIZE*nV*sizeof(double));
|
||||||
|
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
for (size_t gam=0 ; gam<nV ; ++gam)
|
for (size_t gam=0 ; gam<nV ; ++gam)
|
||||||
{
|
{
|
||||||
|
@ -6,7 +6,7 @@ module gpu_module
|
|||||||
interface
|
interface
|
||||||
subroutine compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num, t1,tau,&
|
subroutine compute_r2_space_chol_gpu(nO,nV,cholesky_mo_num, t1,tau,&
|
||||||
cc_space_v_vo_chol,cc_space_v_vv_chol, &
|
cc_space_v_vo_chol,cc_space_v_vv_chol, &
|
||||||
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, &
|
cc_space_v_oooo, cc_space_v_vooo, cc_space_v_oovv, cc_space_v_vvoo, &
|
||||||
r2) bind(C)
|
r2) bind(C)
|
||||||
import c_int, c_double
|
import c_int, c_double
|
||||||
integer(c_int), value :: nO, nV, cholesky_mo_num
|
integer(c_int), value :: nO, nV, cholesky_mo_num
|
||||||
@ -17,6 +17,7 @@ module gpu_module
|
|||||||
real(c_double), intent(in) :: cc_space_v_oooo(nO,nO,nO,nO)
|
real(c_double), intent(in) :: cc_space_v_oooo(nO,nO,nO,nO)
|
||||||
real(c_double), intent(in) :: cc_space_v_vooo(nV,nO,nO,nO)
|
real(c_double), intent(in) :: cc_space_v_vooo(nV,nO,nO,nO)
|
||||||
real(c_double), intent(in) :: cc_space_v_oovv(nO,nO,nV,nV)
|
real(c_double), intent(in) :: cc_space_v_oovv(nO,nO,nV,nV)
|
||||||
|
real(c_double), intent(in) :: cc_space_v_vvoo(nV,nV,nO,nO)
|
||||||
real(c_double), intent(out) :: r2(nO,nO,nV,nV)
|
real(c_double), intent(out) :: r2(nO,nO,nV,nV)
|
||||||
end subroutine
|
end subroutine
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user