mirror of
https://gitlab.com/scemama/qp_plugins_scemama.git
synced 2024-12-21 20:03:37 +01:00
Added loop
This commit is contained in:
parent
eaa7160884
commit
79060ff6e1
@ -499,41 +499,41 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
double precision, allocatable :: X_oovv(:,:,:,:)
|
||||
double precision, allocatable :: X_vovv(:,:,:,:)
|
||||
|
||||
allocate(X_vovv(nV,nO,nV,block_size))
|
||||
allocate(Y_oovv(nO,nO,nV,nV))
|
||||
|
||||
do iblock = 1, nV, block_size
|
||||
do gam = iblock, min(nV, iblock+block_size-1)
|
||||
call dgemm('T','N',nV, nO*nV, cholesky_mo_num, 1.d0, &
|
||||
cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, cc_space_v_ov_chol, &
|
||||
cholesky_mo_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV)
|
||||
|
||||
enddo
|
||||
call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, &
|
||||
1d0, t1 , size(t1,1), &
|
||||
X_vovv, size(X_vovv,1), &
|
||||
0d0, Y_oovv(1,1,1,iblock), size(Y_oovv,1))
|
||||
|
||||
enddo
|
||||
deallocate(X_vovv)
|
||||
|
||||
!$omp parallel &
|
||||
!$omp shared(nO,nV,r2,Y_oovv) &
|
||||
!$omp private(u,v,gam,beta) &
|
||||
!$omp default(none)
|
||||
!$omp do
|
||||
do gam = 1, nV
|
||||
do beta = 1, nV
|
||||
do v = 1, nO
|
||||
do u = 1, nO
|
||||
r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!$omp end do
|
||||
!$omp end parallel
|
||||
deallocate(Y_oovv)
|
||||
! allocate(X_vovv(nV,nO,nV,block_size))
|
||||
! allocate(Y_oovv(nO,nO,nV,nV))
|
||||
!
|
||||
! do iblock = 1, nV, block_size
|
||||
! do gam = iblock, min(nV, iblock+block_size-1)
|
||||
! call dgemm('T','N',nV, nO*nV, cholesky_mo_num, 1.d0, &
|
||||
! cc_space_v_vv_chol(1,1,gam), cholesky_mo_num, cc_space_v_ov_chol, &
|
||||
! cholesky_mo_num, 0.d0, X_vovv(1,1,1,gam-iblock+1), nV)
|
||||
!
|
||||
! enddo
|
||||
! call dgemm('N','N',nO,nO*nV*min(block_size, nV-iblock+1),nV, &
|
||||
! 1d0, t1 , size(t1,1), &
|
||||
! X_vovv, size(X_vovv,1), &
|
||||
! 0d0, Y_oovv(1,1,1,iblock), size(Y_oovv,1))
|
||||
!
|
||||
! enddo
|
||||
! deallocate(X_vovv)
|
||||
!
|
||||
! !$omp parallel &
|
||||
! !$omp shared(nO,nV,r2,Y_oovv) &
|
||||
! !$omp private(u,v,gam,beta) &
|
||||
! !$omp default(none)
|
||||
! !$omp do
|
||||
! do gam = 1, nV
|
||||
! do beta = 1, nV
|
||||
! do v = 1, nO
|
||||
! do u = 1, nO
|
||||
! r2(u,v,beta,gam) = r2(u,v,beta,gam) + Y_oovv(v,u,beta,gam) + Y_oovv(u,v,gam,beta)
|
||||
! enddo
|
||||
! enddo
|
||||
! enddo
|
||||
! enddo
|
||||
! !$omp end do
|
||||
! !$omp end parallel
|
||||
! deallocate(Y_oovv)
|
||||
|
||||
double precision, allocatable :: X_ovvo(:,:,:,:)
|
||||
double precision, allocatable :: tcc(:,:,:), tcc2(:,:,:)
|
||||
@ -884,7 +884,6 @@ subroutine compute_r2_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r2,max_r2)
|
||||
end
|
||||
|
||||
|
||||
! g_occ
|
||||
|
||||
|
||||
subroutine compute_J1_chol(nO,nV,t1,t2,v_ovvo,v_ovoo,v_vvoo,J1)
|
||||
|
@ -350,8 +350,58 @@ void compute_r2_space_chol_gpu(const int nO, const int nV, const int cholesky_mo
|
||||
cudaFree(d_X_oovv);
|
||||
}
|
||||
|
||||
#pragma omp section
|
||||
{
|
||||
double* d_X_vovv;
|
||||
lda = nV*nO;
|
||||
cudaMalloc((void **)&d_X_vovv, nV*nO*nV*BLOCK_SIZE * sizeof(double));
|
||||
|
||||
double* d_Y_oovv;
|
||||
lda = nO*nO;
|
||||
cudaMalloc((void **)&d_Y_oovv, nO*nO*nV*nV * sizeof(double));
|
||||
|
||||
for (size_t iblock=0 ; iblock<nV ; iblock += BLOCK_SIZE) {
|
||||
size_t mbs = nV < iblock+BLOCK_SIZE ? nV : iblock+BLOCK_SIZE;
|
||||
for (size_t gam=iblock ; gam<mbs ; ++gam) {
|
||||
alpha = 1.0;
|
||||
beta = 0.0;
|
||||
m=nV; n=nO*nV; k=cholesky_mo_num;
|
||||
A=&(d_cc_space_v_vv_chol[cholesky_mo_num*nV*gam]); lda=cholesky_mo_num;
|
||||
B=d_cc_space_v_ov_chol; ldb=cholesky_mo_num;
|
||||
C=&(d_X_vovv[nV*nO*nV*(gam-iblock)]); ldc=nV;
|
||||
cublasDgemm(handle, CUBLAS_OP_T, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
|
||||
}
|
||||
mbs = BLOCK_SIZE < nV-iblock ? BLOCK_SIZE : nV-iblock;
|
||||
alpha = 1.0;
|
||||
beta = 0.0;
|
||||
m=nO; n=nO*nV*mbs; k=nV;
|
||||
A=d_t1; lda=nO;
|
||||
B=d_X_vovv; ldb=nV;
|
||||
C=&(d_Y_oovv[nO*nO*nV*iblock]); ldc=nO;
|
||||
cublasDgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, A, lda, B, ldb, &beta, C, ldc);
|
||||
}
|
||||
cudaFree(d_X_vovv);
|
||||
for (int j=0 ; j<nV ; ++j) {
|
||||
for (int i=0 ; i<nV ; ++i) {
|
||||
alpha = 1.0;
|
||||
beta = 1.0;
|
||||
A = &(d_r2[nO*nO*(i+nV*j)]); lda = nO;
|
||||
B = &(d_Y_oovv[nO*nO*(i+nV*j)]); ldb = nO;
|
||||
C = &(d_r2[nO*nO*(i+nV*j)]); ldc = nO;
|
||||
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_T, nO, nO, &alpha, A, lda, &beta, B, ldb, C, ldc);
|
||||
A = &(d_r2[nO*nO*(i+nV*j)]); lda = nO;
|
||||
B = &(d_Y_oovv[nO*nO*(j+nV*i)]); ldb = nO;
|
||||
C = &(d_r2[nO*nO*(i+nV*j)]); ldc = nO;
|
||||
cublasDgeam(handle, CUBLAS_OP_N, CUBLAS_OP_N, nO, nO, &alpha, A, lda, &beta, B, ldb, C, ldc);
|
||||
}
|
||||
}
|
||||
cudaFree(d_Y_oovv);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
lda = cholesky_mo_num * nV;
|
||||
cudaMalloc((void **)&d_tmp_cc, lda * nV * sizeof(double));
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user