1
0
mirror of https://gitlab.com/scemama/qp_plugins_scemama.git synced 2024-08-29 23:43:43 +02:00
qp_plugins_scemama/devel/ccsd_gpu/ccsd_space_orb_sub_chol.irp.f

293 lines
6.6 KiB
FortranFixed
Raw Normal View History

2023-07-16 09:54:58 +02:00
subroutine ccsd_energy_space_chol(nO,nV,tau,t1,energy)
implicit none
integer, intent(in) :: nO, nV
double precision, intent(in) :: tau(nO,nO,nV,nV)
double precision, intent(in) :: t1(nO,nV)
double precision, intent(out) :: energy
! internal
integer :: i,j,a,b
double precision :: e
energy = 0d0
!$omp parallel &
!$omp shared(nO,nV,energy,tau,t1,&
!$omp cc_space_f_vo,cc_space_w_oovv) &
!$omp private(i,j,a,b,e) &
!$omp default(none)
e = 0d0
!$omp do
do a = 1, nV
do i = 1, nO
e = e + 2d0 * cc_space_f_vo(a,i) * t1(i,a)
enddo
enddo
!$omp end do nowait
!$omp do
do b = 1, nV
do a = 1, nV
do j = 1, nO
do i = 1, nO
e = e + tau(i,j,a,b) * cc_space_w_oovv(i,j,a,b)
enddo
enddo
enddo
enddo
!$omp end do nowait
!$omp critical
energy = energy + e
!$omp end critical
!$omp end parallel
end
! Tau
subroutine update_tau_space_chol(nO,nV,t1,t2,tau)
implicit none
! in
integer, intent(in) :: nO, nV
double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV)
! out
double precision, intent(out) :: tau(nO,nO,nV,nV)
! internal
integer :: i,j,a,b
!$OMP PARALLEL &
!$OMP SHARED(nO,nV,tau,t2,t1) &
!$OMP PRIVATE(i,j,a,b) &
!$OMP DEFAULT(NONE)
!$OMP DO
do b = 1, nV
do a = 1, nV
do j = 1, nO
do i = 1, nO
tau(i,j,a,b) = t2(i,j,a,b) + t1(i,a) * t1(j,b)
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
end
! R1
subroutine compute_r1_space_chol(nO,nV,t1,t2,tau,H_oo,H_vv,H_vo,r1,max_r1)
implicit none
! in
integer, intent(in) :: nO, nV
double precision, intent(in) :: t1(nO,nV), t2(nO,nO,nV,nV), tau(nO,nO,nV,nV)
double precision, intent(in) :: H_oo(nO,nO), H_vv(nV,nV), H_vo(nV,nO)
! out
double precision, intent(out) :: r1(nO,nV), max_r1
! internal
integer :: u,i,j,beta,a,b
!$omp parallel &
!$omp shared(nO,nV,r1,cc_space_f_ov) &
!$omp private(u,beta) &
!$omp default(none)
!$omp do
do beta = 1, nV
do u = 1, nO
r1(u,beta) = cc_space_f_ov(u,beta)
enddo
enddo
!$omp end do
!$omp end parallel
double precision, allocatable :: X_oo(:,:)
allocate(X_oo(nO,nO))
call dgemm('N','N', nO, nO, nV, &
-2d0, t1 , size(t1,1), &
cc_space_f_vo, size(cc_space_f_vo,1), &
0d0, X_oo , size(X_oo,1))
call dgemm('T','N', nO, nV, nO, &
1d0, X_oo, size(X_oo,2), &
t1 , size(t1,1), &
1d0, r1 , size(r1,1))
deallocate(X_oo)
call dgemm('N','N', nO, nV, nV, &
1d0, t1 , size(t1,1), &
H_vv, size(H_vv,1), &
1d0, r1 , size(r1,1))
call dgemm('N','N', nO, nV, nO, &
-1d0, H_oo, size(H_oo,1), &
t1 , size(t1,1), &
1d0, r1, size(r1,1))
double precision, allocatable :: X_voov(:,:,:,:)
allocate(X_voov(nV, nO, nO, nV))
!$omp parallel &
!$omp shared(nO,nV,X_voov,t2,t1) &
!$omp private(u,beta,i,a) &
!$omp default(none)
!$omp do
do beta = 1, nV
do u = 1, nO
do i = 1, nO
do a = 1, nV
X_voov(a,i,u,beta) = 2d0 * t2(i,u,a,beta) - t2(u,i,a,beta) + t1(u,a) * t1(i,beta)
enddo
enddo
enddo
enddo
!$omp end do
!$omp end parallel
call dgemv('T', nV*nO, nO*nV, &
1d0, X_voov, size(X_voov,1) * size(X_voov,2), &
H_vo , 1, &
1d0, r1 , 1)
deallocate(X_voov)
double precision, allocatable :: X_ovov(:,:,:,:)
allocate(X_ovov(nO, nV, nO, nV))
!$omp parallel &
!$omp shared(nO,nV,cc_space_v_ovov,cc_space_v_voov,X_ovov) &
!$omp private(u,beta,i,a) &
!$omp default(none)
!$omp do
do beta = 1, nV
do u = 1, nO
do a = 1, nv
do i = 1, nO
X_ovov(i,a,u,beta) = 2d0 * cc_space_v_voov(a,u,i,beta) - cc_space_v_ovov(u,a,i,beta)
enddo
enddo
enddo
enddo
!$omp end do
!$omp end parallel
call dgemv('T', nO*nV, nO*nV, &
1d0, X_ovov, size(X_ovov,1) * size(X_ovov,2), &
t1 , 1, &
1d0, r1 , 1)
deallocate(X_ovov)
integer :: iblock, block_size, nVmax
double precision, allocatable :: W_vvov(:,:,:,:), W_vvov_tmp(:,:,:,:), T_vvoo(:,:,:,:)
block_size = 16
allocate(W_vvov(nV,nV,nO,block_size), W_vvov_tmp(nV,nO,nV,block_size), T_vvoo(nV,nV,nO,nO))
!$omp parallel &
!$omp private(u,i,b,a) &
!$omp default(shared)
!$omp do
do u = 1, nO
do i = 1, nO
do b = 1, nV
do a = 1, nV
T_vvoo(a,b,i,u) = tau(i,u,a,b)
enddo
enddo
enddo
enddo
!$omp end do
!$omp end parallel
do iblock = 1, nV, block_size
nVmax = min(block_size,nV-iblock+1)
call dgemm('T','N', nV*nO, nV*nVmax, cholesky_mo_num, 1.d0, &
cc_space_v_vo_chol , cholesky_mo_num, &
cc_space_v_vv_chol(1,1,iblock), cholesky_mo_num, &
0.d0, W_vvov_tmp, nV*nO)
!$omp parallel &
!$omp private(b,i,a,beta) &
!$omp default(shared)
do beta = 1, nVmax
do i = 1, nO
!$omp do
do b = 1, nV
do a = 1, nV
W_vvov(a,b,i,beta) = 2d0 * W_vvov_tmp(a,i,b,beta) - W_vvov_tmp(b,i,a,beta)
enddo
enddo
!$omp end do nowait
enddo
enddo
!$omp barrier
!$omp end parallel
call dgemm('T','N',nO,nVmax,nO*nV*nV, &
1d0, T_vvoo, nV*nV*nO, &
W_vvov, nO*nV*nV, &
1d0, r1(1,iblock), nO)
enddo
deallocate(W_vvov,T_vvoo)
double precision, allocatable :: W_oovo(:,:,:,:)
allocate(W_oovo(nO,nO,nV,nO))
!$omp parallel &
!$omp shared(nO,nV,cc_space_v_vooo,W_oovo) &
!$omp private(u,a,i,j) &
!$omp default(none)
do u = 1, nO
!$omp do
do a = 1, nV
do j = 1, nO
do i = 1, nO
W_oovo(i,j,a,u) = 2d0 * cc_space_v_vooo(a,u,i,j) - cc_space_v_vooo(a,u,j,i)
enddo
enddo
enddo
!$omp end do nowait
enddo
!$omp end parallel
call dgemm('T','N', nO, nV, nO*nO*nV, &
-1d0, W_oovo, size(W_oovo,1) * size(W_oovo,2) * size(W_oovo,3), &
tau , size(tau,1) * size(tau,2) * size(tau,3), &
1d0, r1 , size(r1,1))
deallocate(W_oovo)
max_r1 = 0d0
do a = 1, nV
do i = 1, nO
max_r1 = max(dabs(r1(i,a)), max_r1)
enddo
enddo
! Change the sign for consistency with the code in spin orbitals
!$omp parallel &
!$omp shared(nO,nV,r1) &
!$omp private(a,i) &
!$omp default(none)
!$omp do
do a = 1, nV
do i = 1, nO
r1(i,a) = -r1(i,a)
enddo
enddo
!$omp end do
!$omp end parallel
end