4
1
mirror of https://github.com/pfloos/quack synced 2024-11-06 22:24:03 +01:00

Merge pull request #7 from AbdAmmar/master

optimizations in pp-RPA & pp-BSE
This commit is contained in:
AbdAmmar 2024-08-26 15:28:40 +02:00 committed by GitHub
commit 1de213dc89
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 688 additions and 216 deletions

View File

@ -2,6 +2,12 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1
! Compute excitation densities for T-matrix self-energy
! TODO
! debug DGEMM for nC != 0
! and nR != 0
implicit none
! Input variables
@ -33,6 +39,10 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1
double precision,intent(out) :: rho1(nBas,nBas,nVV)
double precision,intent(out) :: rho2(nBas,nBas,nOO)
integer :: dim_1, dim_2
double precision, allocatable :: ERI_1(:,:,:)
double precision, allocatable :: ERI_2(:,:,:)
! Initialization
rho1(:,:,:) = 0d0
@ -44,11 +54,10 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1
if(ispin == 1) then
!$OMP PARALLEL &
!$OMP SHARED(nC,nBas,nR,nO,nVV,nOO,rho1,rho2,ERI,X1,Y1,X2,Y2) &
!$OMP PRIVATE(q,p,ab,cd,kl,ij) &
!$OMP DEFAULT(NONE)
!$OMP DO
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) &
!$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2)
!$OMP DO COLLAPSE(2)
do q=nC+1,nBas-nR
do p=nC+1,nBas-nR
@ -123,65 +132,132 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1
if(ispin == 2 .or. ispin == 4) then
do q=nC+1,nBas-nR
do p=nC+1,nBas-nR
dim_1 = (nBas - nO) * (nBas - nO - 1) / 2
dim_2 = nO * (nO - 1) / 2
! do ab=1,nVV
ab = 0
do a=nO+1,nBas-nR
do b=a+1,nBas-nR
ab = ab + 1
allocate(ERI_1(nBas,nBas,dim_1), ERI_2(nBas,nBas,dim_2))
ERI_1 = 0.d0
ERI_2 = 0.d0
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(p, q, c, d, cd, k, l, kl) &
!$OMP SHARED(nC, nBas, nR, nO, ERI_1, ERI_2, ERI)
!$OMP DO COLLAPSE(2)
do q = nC+1, nBas-nR
do p = nC+1, nBas-nR
cd = 0
do c=nO+1,nBas-nR
do d=c+1,nBas-nR
do c = nO+1, nBas-nR
do d = c+1, nBas-nR
cd = cd + 1
rho1(p,q,ab) = rho1(p,q,ab) &
+ (ERI(p,q,c,d) - ERI(p,q,d,c))*X1(cd,ab)
end do
end do
ERI_1(p,q,cd) = ERI(p,q,c,d) - ERI(p,q,d,c)
enddo
enddo
kl = 0
do k=nC+1,nO
do l=k+1,nO
do k = nC+1, nO
do l = k+1, nO
kl = kl + 1
rho1(p,q,ab) = rho1(p,q,ab) &
+ (ERI(p,q,k,l) - ERI(p,q,l,k))*Y1(kl,ab)
ERI_2(p,q,kl) = ERI(p,q,k,l) - ERI(p,q,l,k)
end do
end do
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
end do
end do
call dgemm("N", "N", nBas*nBas, dim_1, dim_1, 1.d0, &
ERI_1(1,1,1), nBas*nBas, X1(1,1), dim_1, &
0.d0, rho1(1,1,1), nBas*nBas)
! do ij=1,nOO
ij = 0
do i=nC+1,nO
do j=i+1,nO
ij = ij + 1
call dgemm("N", "N", nBas*nBas, dim_1, dim_2, 1.d0, &
ERI_2(1,1,1), nBas*nBas, Y1(1,1), dim_2, &
1.d0, rho1(1,1,1), nBas*nBas)
cd = 0
do c=nO+1,nBas-nR
do d=c+1,nBas-nR
cd = cd + 1
rho2(p,q,ij) = rho2(p,q,ij) &
+ (ERI(p,q,c,d) - ERI(p,q,d,c))*X2(cd,ij)
end do
end do
call dgemm("N", "N", nBas*nBas, dim_2, dim_1, 1.d0, &
ERI_1(1,1,1), nBas*nBas, X2(1,1), dim_1, &
0.d0, rho2(1,1,1), nBas*nBas)
kl = 0
do k=nC+1,nO
do l=k+1,nO
kl = kl + 1
rho2(p,q,ij) = rho2(p,q,ij) &
+ (ERI(p,q,k,l) - ERI(p,q,l,k))*Y2(kl,ij)
end do
end do
call dgemm("N", "N", nBas*nBas, dim_2, dim_2, 1.d0, &
ERI_2(1,1,1), nBas*nBas, Y2(1,1), dim_2, &
1.d0, rho2(1,1,1), nBas*nBas)
end do
end do
deallocate(ERI_1, ERI_2)
end do
end do
! !$OMP PARALLEL DEFAULT(NONE) &
! !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) &
! !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2)
! !$OMP DO COLLAPSE(2)
! do q = nC+1, nBas-nR
! do p = nC+1, nBas-nR
!
! ab = 0
!
! do a = nO+1, nBas-nR
! do b = a+1, nBas-nR
!
! ab = ab + 1
!
! cd = 0
! do c = nO+1, nBas-nR
! do d = c+1, nBas-nR
!
! cd = cd + 1
!
! rho1(p,q,ab) = rho1(p,q,ab) &
! + (ERI(p,q,c,d) - ERI(p,q,d,c))*X1(cd,ab)
! end do ! d
! end do ! c
!
! kl = 0
! do k = nC+1, nO
! do l = k+1, nO
!
! kl = kl + 1
!
! rho1(p,q,ab) = rho1(p,q,ab) &
! + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y1(kl,ab)
! end do ! l
! end do ! k
!
! end do ! b
! end do ! a
!
! ij = 0
! do i = nC+1, nO
! do j = i+1, nO
!
! ij = ij + 1
!
! cd = 0
!
! do c = nO+1, nBas-nR
! do d = c+1, nBas-nR
!
! cd = cd + 1
!
! rho2(p,q,ij) = rho2(p,q,ij) &
! + (ERI(p,q,c,d) - ERI(p,q,d,c))*X2(cd,ij)
! end do ! d
! end do ! c
!
! kl = 0
! do k = nC+1, nO
! do l = k+1, nO
!
! kl = kl + 1
!
! rho2(p,q,ij) = rho2(p,q,ij) &
! + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y2(kl,ij)
! end do ! l
! end do ! k
!
! end do ! j
! end do ! i
!
! end do ! p
! end do ! q
! !$OMP END DO
! !$OMP END PARALLEL
end if
@ -191,70 +267,128 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1
if(ispin == 3) then
!$OMP PARALLEL &
!$OMP SHARED(nC,nBas,nR,nO,nVV,nOO,rho1,rho2,ERI,X1,Y1,X2,Y2) &
!$OMP PRIVATE(q,p,ab,cd,kl,ij,c,d,k,l) &
!$OMP DEFAULT(NONE)
!$OMP DO
dim_1 = (nBas - nO) * (nBas - nO)
dim_2 = nO * nO
do q=nC+1,nBas-nR
do p=nC+1,nBas-nR
! do ab=1,nVV
ab = 0
do a=nO+1,nBas-nR
do b=nO+1,nBas-nR
ab = ab + 1
allocate(ERI_1(nBas,nBas,dim_1), ERI_2(nBas,nBas,dim_2))
ERI_1 = 0.d0
ERI_2 = 0.d0
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(p, q, c, d, cd, k, l, kl) &
!$OMP SHARED(nC, nBas, nR, nO, ERI_1, ERI_2, ERI)
!$OMP DO COLLAPSE(2)
do q = nC+1, nBas-nR
do p = nC+1, nBas-nR
cd = 0
do c=nO+1,nBas-nR
do d=nO+1,nBas-nR
do c = nO+1, nBas-nR
do d = nO+1, nBas-nR
cd = cd + 1
rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,c,d)*X1(cd,ab)
end do
end do
ERI_1(p,q,cd) = ERI(p,q,c,d)
enddo
enddo
kl = 0
do k=nC+1,nO
do l=nC+1,nO
do k = nC+1, nO
do l = nC+1, nO
kl = kl + 1
rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,k,l)*Y1(kl,ab)
end do
end do
end do
end do
! do ij=1,nOO
ij = 0
do i=nC+1,nO
do j=nC+1,nO
ij = ij + 1
cd = 0
do c=nO+1,nBas-nR
do d=nO+1,nBas-nR
cd = cd + 1
rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,c,d)*X2(cd,ij)
end do
end do
kl = 0
do k=nC+1,nO
do l=nC+1,nO
kl = kl + 1
rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,k,l)*Y2(kl,ij)
end do
end do
end do
end do
ERI_2(p,q,kl) = ERI(p,q,k,l)
end do
end do
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm("N", "N", nBas*nBas, dim_1, dim_1, 1.d0, &
ERI_1(1,1,1), nBas*nBas, X1(1,1), dim_1, &
0.d0, rho1(1,1,1), nBas*nBas)
call dgemm("N", "N", nBas*nBas, dim_1, dim_2, 1.d0, &
ERI_2(1,1,1), nBas*nBas, Y1(1,1), dim_2, &
1.d0, rho1(1,1,1), nBas*nBas)
call dgemm("N", "N", nBas*nBas, dim_2, dim_1, 1.d0, &
ERI_1(1,1,1), nBas*nBas, X2(1,1), dim_1, &
0.d0, rho2(1,1,1), nBas*nBas)
call dgemm("N", "N", nBas*nBas, dim_2, dim_2, 1.d0, &
ERI_2(1,1,1), nBas*nBas, Y2(1,1), dim_2, &
1.d0, rho2(1,1,1), nBas*nBas)
deallocate(ERI_1, ERI_2)
! !$OMP PARALLEL DEFAULT(NONE) &
! !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) &
! !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2)
! !$OMP DO COLLAPSE(2)
!
! do q = nC+1, nBas-nR
! do p = nC+1, nBas-nR
!
! ab = 0
! do a = nO+1, nBas-nR
! do b = nO+1, nBas-nR
!
! ab = ab + 1
!
! cd = 0
! do c = nO+1, nBas-nR
! do d = nO+1, nBas-nR
!
! cd = cd + 1
!
! rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,c,d)*X1(cd,ab)
! end do
! end do
!
! kl = 0
! do k = nC+1, nO
! do l = nC+1, nO
!
! kl = kl + 1
!
! rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,k,l)*Y1(kl,ab)
! end do
! end do
!
! end do
! end do
!
! ij = 0
! do i = nC+1, nO
! do j = nC+1, nO
!
! ij = ij + 1
!
! cd = 0
! do c = nO+1, nBas-nR
! do d = nO+1, nBas-nR
!
! cd = cd + 1
!
! rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,c,d)*X2(cd,ij)
! end do
! end do
!
! kl = 0
! do k = nC+1, nO
! do l = nC+1, nO
!
! kl = kl + 1
!
! rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,k,l)*Y2(kl,ij)
! end do
! end do
!
! end do
! end do
!
! end do
! end do
! !$OMP END DO
! !$OMP END PARALLEL
end if
end subroutine

View File

@ -64,6 +64,7 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d
double precision,allocatable :: eGT(:)
double precision,allocatable :: eGTlin(:)
! Output variables
! Hello world
@ -122,9 +123,9 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d
allocate(Bpp(nVVs,nOOs),Cpp(nVVs,nVVs),Dpp(nOOs,nOOs))
if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,1d0,ERI,Bpp)
call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVs,1d0,eHF,ERI,Cpp)
call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOs,1d0,eHF,ERI,Dpp)
if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,1d0,ERI,Bpp)
call ppLR(TDA_T,nOOs,nVVs,Bpp,Cpp,Dpp,Om1s,X1s,Y1s,Om2s,X2s,Y2s,EcRPA(ispin))
@ -145,9 +146,9 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d
allocate(Bpp(nVVt,nOOt),Cpp(nVVt,nVVt),Dpp(nOOt,nOOt))
if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,1d0,ERI,Bpp)
call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVt,1d0,eHF,ERI,Cpp)
call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOt,1d0,eHF,ERI,Dpp)
if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,1d0,ERI,Bpp)
call ppLR(TDA_T,nOOt,nVVt,Bpp,Cpp,Dpp,Om1t,X1t,Y1t,Om2t,X2t,Y2t,EcRPA(ispin))
@ -162,10 +163,12 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d
! iblock = 1
iblock = 3
call GTpp_excitation_density(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,ERI,X1s,Y1s,rho1s,X2s,Y2s,rho2s)
! iblock = 2
iblock = 4
call GTpp_excitation_density(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,ERI,X1t,Y1t,rho1t,X2t,Y2t,rho2t)
!----------------------------------------------
@ -218,9 +221,9 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d
allocate(Bpp(nVVs,nOOs),Cpp(nVVs,nVVs),Dpp(nOOs,nOOs))
if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,1d0,ERI,Bpp)
call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVs,1d0,eGT,ERI,Cpp)
call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOs,1d0,eGT,ERI,Dpp)
if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,1d0,ERI,Bpp)
call ppLR(TDA_T,nOOs,nVVs,Bpp,Cpp,Dpp,Om1s,X1s,Y1s,Om2s,X2s,Y2s,EcRPA(ispin))
@ -232,9 +235,9 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d
allocate(Bpp(nVVt,nOOt),Cpp(nVVt,nVVt),Dpp(nOOt,nOOt))
if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,1d0,ERI,Bpp)
call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVt,1d0,eGT,ERI,Cpp)
call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOt,1d0,eGT,ERI,Dpp)
if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,1d0,ERI,Bpp)
call ppLR(TDA_T,nOOt,nVVt,Bpp,Cpp,Dpp,Om1t,X1t,Y1t,Om2t,X2t,Y2t,EcRPA(ispin))

View File

@ -66,6 +66,7 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS,
double precision,intent(out) :: EcBSE(nspin)
!---------------------------------
! Compute (singlet) RPA screening
!---------------------------------
@ -77,9 +78,11 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS,
Aph(nS,nS),Bph(nS,nS))
call phLR_A(isp_W,dRPA_W,nBas,nC,nO,nV,nR,nS,1d0,eW,ERI,Aph)
if(.not.TDA_W) call phLR_B(isp_W,dRPA_W,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph)
call phLR(TDA_W,nS,Aph,Bph,EcRPA,OmRPA,XpY_RPA,XmY_RPA)
call GW_excitation_density(nBas,nC,nO,nR,nS,ERI,XpY_RPA,rho_RPA)
deallocate(XpY_RPA,XmY_RPA,Aph,Bph)
@ -108,13 +111,13 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS,
! Compute BSE excitation energies
if(.not.TDA) call GW_ppBSE_static_kernel_B(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta)
call GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta)
call GW_ppBSE_static_kernel_D(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta)
if(.not.TDA) call GW_ppBSE_static_kernel_B(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta)
if(.not.TDA) call ppLR_B(ispin,nBas,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp)
call ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp)
call ppLR_D(ispin,nBas,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp)
if(.not.TDA) call ppLR_B(ispin,nBas,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp)
Bpp(:,:) = Bpp(:,:) + KB_sta(:,:)
Cpp(:,:) = Cpp(:,:) + KC_sta(:,:)
@ -131,9 +134,9 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS,
if(dBSE) &
call GW_ppBSE_dynamic_perturbation(ispin,dTDA,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,eW,eGW,ERI,dipole_int,OmRPA,rho_RPA, &
Om1,X1,Y1,Om2,X2,Y2,KB_sta,KC_sta,KD_sta)
write(*,*) "Deallocate not done"
deallocate(Om1,X1,Y1,Om2,X2,Y2,Bpp,Cpp,Dpp,KB_sta,KC_sta,KD_sta)
write(*,*) "Deallocate done"
end if
!-------------------
@ -160,14 +163,13 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS,
! Compute BSE excitation energies
if(.not.TDA) call GW_ppBSE_static_kernel_B(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta)
call GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta)
call GW_ppBSE_static_kernel_D(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta)
if(.not.TDA) call GW_ppBSE_static_kernel_B(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta)
if(.not.TDA) call ppLR_B(ispin,nBas,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp)
call ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp)
call ppLR_D(ispin,nBas,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp)
if(.not.TDA) call ppLR_B(ispin,nBas,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp)
Bpp(:,:) = Bpp(:,:) + KB_sta(:,:)
Cpp(:,:) = Cpp(:,:) + KC_sta(:,:)

View File

@ -26,44 +26,186 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI
double precision,external :: Kronecker_delta
double precision :: chi
double precision :: eps
double precision :: tmp_ab, lambda4, eta2
integer :: a,b,c,d,ab,cd,m
integer :: a0, aa
double precision, allocatable :: Om_tmp(:)
double precision, allocatable :: tmp_m(:,:,:)
double precision, allocatable :: tmp(:,:,:,:)
! Output variables
double precision,intent(out) :: KC(nVV,nVV)
! Initialization
KC(:,:) = 0d0
!---------------!
! Singlet block !
!---------------!
if(ispin == 1) then
ab = 0
do a=nO+1,nBas-nR
do b=a,nBas-nR
ab = ab + 1
a0 = nBas - nR - nO
lambda4 = 4.d0 * lambda
eta2 = eta * eta
allocate(tmp_m(nBas,nBas,nS))
allocate(tmp(nBas,nBas,nBas,nBas))
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(m, c, a, eps) &
!$OMP SHARED(nS, nBas, eta2, Om, rho, tmp_m)
!$OMP DO
do m = 1, nS
eps = Om(m) / (Om(m)*Om(m) + eta2)
do c = 1, nBas
do a = 1, nBas
tmp_m(a,c,m) = eps * rho(a,c,m)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm("N", "T", nBas*nBas, nBas*nBas, nS, 1.d0, &
tmp_m(1,1,1), nBas*nBas, rho(1,1,1), nBas*nBas, &
0.d0, tmp(1,1,1,1), nBas*nBas)
deallocate(tmp_m)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(a, b, aa, ab, c, d, cd, tmp_ab) &
!$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, tmp, KC)
!$OMP DO
do a = nO+1, nBas-nR
aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO
do b = a, nBas-nR
ab = aa + b
tmp_ab = lambda4
if(a .eq. b) then
tmp_ab = 0.7071067811865475d0 * lambda4
endif
cd = 0
do c=nO+1,nBas-nR
do d=c,nBas-nR
do c = nO+1, nBas-nR
do d = c, nBas-nR
cd = cd + 1
chi = 0d0
do m=1,nS
eps = Om(m)**2 + eta**2
chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps &
- rho(a,d,m)*rho(b,c,m)*Om(m)/eps
end do
KC(ab,cd) = -tmp_ab * (tmp(a,c,b,d) + tmp(a,d,b,c))
if(c .eq. d) then
KC(ab,cd) = 0.7071067811865475d0 * KC(ab,cd)
endif
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
KC(ab,cd) = 4d0*lambda*chi/sqrt((1d0 + Kronecker_delta(a,b))*(1d0 + Kronecker_delta(c,d)))
deallocate(tmp)
end do
end do
end do
end do
! do a=nO+1,nBas-nR
! do b=a,nBas-nR
! ab = ab + 1
! cd = 0
! do c=nO+1,nBas-nR
! do d=c,nBas-nR
! cd = cd + 1
!
! chi = 0d0
! do m=1,nS
! eps = Om(m)**2 + eta**2
! chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps &
! - rho(a,d,m)*rho(b,c,m)*Om(m)/eps
! end do
! --- --- ---
! OpenMP implementation
! --- --- ---
!
! a0 = nBas - nR - nO
! lambda4 = 4.d0 * lambda
! eta2 = eta * eta
!
! allocate(Om_tmp(nS))
!
! !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp)
! !$OMP DO
! do m = 1, nS
! Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2)
! enddo
! !$OMP END DO
! !$OMP END PARALLEL
!
! !$OMP PARALLEL DEFAULT(NONE) &
! !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m, tmp_ab) &
! !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC)
! !$OMP DO
! do a = nO+1, nBas-nR
! aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO
! do b = a, nBas-nR
! ab = aa + b
!
! tmp_ab = lambda4
! if(a .eq. b) then
! tmp_ab = 0.7071067811865475d0 * lambda4
! endif
!
! cd = 0
! do c = nO+1, nBas-nR
! do d = c, nBas-nR
! cd = cd + 1
!
! KC(ab,cd) = 0d0
! do m = 1, nS
! KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) &
! - rho(a,d,m) * rho(b,c,m) * Om_tmp(m)
! end do
!
! KC(ab,cd) = tmp_ab * KC(ab,cd)
! if(c .eq. d) then
! KC(ab,cd) = 0.7071067811865475d0 * KC(ab,cd)
! endif
! enddo
! enddo
! enddo
! enddo
! !$OMP END DO
! !$OMP END PARALLEL
!
! deallocate(Om_tmp)
! --- --- ---
! --- --- ---
! Naive implementation
! --- --- ---
!
! ab = 0
! do a=nO+1,nBas-nR
! do b=a,nBas-nR
! ab = ab + 1
! cd = 0
! do c=nO+1,nBas-nR
! do d=c,nBas-nR
! cd = cd + 1
!
! chi = 0d0
! do m=1,nS
! eps = Om(m)**2 + eta**2
! chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps &
! - rho(a,d,m)*rho(b,c,m)*Om(m)/eps
! end do
!
! KC(ab,cd) = 4d0*lambda*chi/sqrt((1d0 + Kronecker_delta(a,b))*(1d0 + Kronecker_delta(c,d)))
!
! end do
! end do
! end do
! end do
! --- --- ---
end if
@ -73,28 +215,136 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI
if(ispin == 2) then
ab = 0
do a=nO+1,nBas-nR
do b=a+1,nBas-nR
ab = ab + 1
a0 = nBas - nR - nO - 1
lambda4 = 4.d0 * lambda
eta2 = eta * eta
allocate(tmp_m(nBas,nBas,nS))
allocate(tmp(nBas,nBas,nBas,nBas))
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(m, c, a, eps) &
!$OMP SHARED(nS, nBas, eta2, Om, rho, tmp_m)
!$OMP DO
do m = 1, nS
eps = Om(m) / (Om(m)*Om(m) + eta2)
do c = 1, nBas
do a = 1, nBas
tmp_m(a,c,m) = eps * rho(a,c,m)
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm("N", "T", nBas*nBas, nBas*nBas, nS, 1.d0, &
tmp_m(1,1,1), nBas*nBas, rho(1,1,1), nBas*nBas, &
0.d0, tmp(1,1,1,1), nBas*nBas)
deallocate(tmp_m)
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(a, b, aa, ab, c, d, cd) &
!$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, tmp, KC)
!$OMP DO
do a = nO+1, nBas-nR
aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO - 1
do b = a+1, nBas-nR
ab = aa + b
cd = 0
do c=nO+1,nBas-nR
do d=c+1,nBas-nR
do c = nO+1, nBas-nR
do d = c+1, nBas-nR
cd = cd + 1
chi = 0d0
do m=1,nS
eps = Om(m)**2 + eta**2
chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps &
+ rho(a,d,m)*rho(b,c,m)*Om(m)/eps
end do
KC(ab,cd) = lambda4 * (-tmp(a,c,b,d) + tmp(a,d,b,c))
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
KC(ab,cd) = 4d0*lambda*chi
deallocate(tmp)
end do
end do
end do
end do
! --- --- ---
! OpenMP implementation
! --- --- ---
!
! a0 = nBas - nR - nO - 1
! lambda4 = 4.d0 * lambda
! eta2 = eta * eta
!
! allocate(Om_tmp(nS))
!
! !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp)
! !$OMP DO
! do m = 1, nS
! Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2)
! enddo
! !$OMP END DO
! !$OMP END PARALLEL
!
! !$OMP PARALLEL DEFAULT(NONE) &
! !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m) &
! !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC)
! !$OMP DO
! do a = nO+1, nBas-nR
! aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO - 1
! do b = a+1, nBas-nR
! ab = aa + b
!
! cd = 0
! do c = nO+1, nBas-nR
! do d = c+1, nBas-nR
! cd = cd + 1
!
! KC(ab,cd) = 0d0
! do m = 1, nS
! KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) &
! + rho(a,d,m) * rho(b,c,m) * Om_tmp(m)
! end do
!
! KC(ab,cd) = lambda4 * KC(ab,cd)
! enddo
! enddo
! enddo
! enddo
! !$OMP END DO
! !$OMP END PARALLEL
!
! deallocate(Om_tmp)
! --- --- ---
! --- --- ---
! Naive implementation
! --- --- ---
!
! ab = 0
! do a=nO+1,nBas-nR
! do b=a+1,nBas-nR
! ab = ab + 1
! cd = 0
! do c=nO+1,nBas-nR
! do d=c+1,nBas-nR
! cd = cd + 1
!
! chi = 0d0
! do m=1,nS
! eps = Om(m)**2 + eta**2
! chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps &
! + rho(a,d,m)*rho(b,c,m)*Om(m)/eps
! end do
!
! KC(ab,cd) = 4d0*lambda*chi
!
! end do
! end do
! end do
! end do
! --- --- ---
end if

View File

@ -59,6 +59,7 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA
double precision,allocatable :: eGWlin(:)
double precision,allocatable :: eGW(:)
! Output variables
! Hello world
@ -102,6 +103,7 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA
!-------------------!
call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,eHF,ERI,Aph)
if(.not.TDA_W) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph)
call phLR(TDA_W,nS,Aph,Bph,EcRPA,Om,XpY,XmY)
@ -159,6 +161,7 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA
! Compute the RPA correlation energy
call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,eGW,ERI,Aph)
if(.not.TDA_W) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph)
call phLR(TDA_W,nS,Aph,Bph,EcRPA,Om,XpY,XmY)

View File

@ -23,6 +23,8 @@ subroutine ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,lambda,e,ERI,Cpp)
double precision,external :: Kronecker_delta
integer :: a,b,c,d,ab,cd
integer :: a0, aa
double precision :: e_ab, tmp_ab, delta_ac, tmp_cd
! Output variables
@ -37,22 +39,70 @@ subroutine ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,lambda,e,ERI,Cpp)
if(ispin == 1) then
ab = 0
do a=nO+1,nBas-nR
do b=a,nBas-nR
ab = ab + 1
a0 = nBas - nR - nO
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(a, b, aa, ab, c, d, cd, e_ab, tmp_ab, delta_ac, tmp_cd) &
!$OMP SHARED(nO, nBas, nR, a0, eF, lambda, e, ERI, Cpp)
!$OMP DO
do a = nO+1, nBas-nR
aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO
do b = a, nBas-nR
ab = aa + b
e_ab = e(a) + e(b) - eF
tmp_ab = lambda
if(a .eq. b) then
tmp_ab = 0.7071067811865475d0 * lambda
endif
cd = 0
do c=nO+1,nBas-nR
do d=c,nBas-nR
do c = nO+1, nBas-nR
delta_ac = 0.d0
if(a .eq. c) then
delta_ac = 1.d0
endif
do d = c, nBas-nR
cd = cd + 1
Cpp(ab,cd) = + (e(a) + e(b) - eF)*Kronecker_delta(a,c)*Kronecker_delta(b,d) &
+ lambda*(ERI(a,b,c,d) + ERI(a,b,d,c))/sqrt((1d0 + Kronecker_delta(a,b))*(1d0 + Kronecker_delta(c,d)))
tmp_cd = tmp_ab
if(c .eq. d) then
tmp_cd = 0.7071067811865475d0 * tmp_ab
endif
end do
end do
end do
end do
Cpp(ab,cd) = 0.d0
if(b .eq. d) then
Cpp(ab,cd) = e_ab * delta_ac
endif
Cpp(ab,cd) = Cpp(ab,cd) + tmp_cd * (ERI(a,b,c,d) + ERI(a,b,d,c))
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
! ab = 0
! do a=nO+1,nBas-nR
! do b=a,nBas-nR
! ab = ab + 1
! cd = 0
! do c=nO+1,nBas-nR
! do d=c,nBas-nR
! cd = cd + 1
!
! Cpp(ab,cd) = + (e(a) + e(b) - eF)*Kronecker_delta(a,c)*Kronecker_delta(b,d) &
! + lambda*(ERI(a,b,c,d) + ERI(a,b,d,c))/sqrt((1d0 + Kronecker_delta(a,b))*(1d0 + Kronecker_delta(c,d)))
!
! end do
! end do
! end do
! end do
end if

View File

@ -39,6 +39,7 @@ subroutine sort_ppRPA(nOO,nVV,Om,Z,Om1,X1,Y1,Om2,X2,Y2)
double precision,intent(out) :: X2(nVV,nOO)
double precision,intent(out) :: Y2(nOO,nOO)
! Memory allocation
allocate(M(nOO+nVV,nOO+nVV),Z1(nOO+nVV,nVV),Z2(nOO+nVV,nOO),order1(nVV),order2(nOO))
@ -113,6 +114,7 @@ subroutine sort_ppRPA(nOO,nVV,Om,Z,Om1,X1,Y1,Om2,X2,Y2)
end if
! Orthogonalize eigenvectors
! deg1 = 1
@ -202,7 +204,6 @@ subroutine sort_ppRPA(nOO,nVV,Om,Z,Om1,X1,Y1,Om2,X2,Y2)
if(nVV > 0) call dgemm ('N', 'N', nOO+nVV, nVV, nOO+nVV, 1d0, M, nOO+nVV, Z1, nOO+nVV, 0d0, tmp1, nOO+nVV)
if(nVV > 0) call dgemm ('T', 'N', nVV , nVV, nOO+nVV, 1d0, Z1, nOO+nVV, tmp1, nOO+nVV, 0d0, S1, nVV)
if(nOO > 0) call dgemm ('N', 'N', nOO+nVV, nOO, nOO+nVV, 1d0, M, nOO+nVV, -1d0*Z2, nOO+nVV, 0d0, tmp2, nOO+nVV)
if(nOO > 0) call dgemm ('T', 'N', nOO , nOO, nOO+nVV, 1d0, Z2, nOO+nVV, tmp2, nOO+nVV, 0d0, S2, nOO)

View File

@ -76,10 +76,21 @@ STDCXX=-lstdc++
FIX_ORDER_OF_LIBS=-Wl,--start-group
"""
compile_olympe = """
FC = ifort -mkl=parallel -qopenmp
AR = ar crs
FFLAGS = -I$IDIR -Ofast -traceback -xCORE-AVX512
CC = icc
CXX = icpc
LAPACK=
STDCXX=-lstdc++
FIX_ORDER_OF_LIBS=-Wl,--start-group
"""
if sys.platform in ["linux", "linux2"]:
compiler = compile_gfortran_linux
# compiler = compile_gfortran_linux
# compiler = compile_ifort_linux
compiler = compile_olympe
elif sys.platform == "darwin":
compiler = compile_gfortran_mac
else:

View File

@ -54,7 +54,7 @@ subroutine orthogonalization_matrix(nBas,S,X)
end do
call ADAt(nBas,Uvec,Uval,X)
call ADAt(nBas, Uvec(1,1), Uval(1), X(1,1))
elseif(ortho_type == 2) then

View File

@ -375,15 +375,29 @@ subroutine ADAt(N,A,D,B)
double precision,intent(out) :: B(N,N)
B = 0d0
double precision, allocatable :: tmp(:,:)
do i=1,N
do j=1,N
do k=1,N
B(i,k) = B(i,k) + A(i,j)*D(j)*A(k,j)
end do
end do
end do
allocate(tmp(N,N))
!$OMP PARALLEL DEFAULT(NONE) PRIVATE(i, j) SHARED(N, A, D, tmp)
!$OMP DO
do i = 1, N
do j = 1, N
tmp(i,j) = D(i) * A(j,i)
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm("N", "N", N, N, N, 1.d0, A(1,1), N, tmp(1,1), N, 0.d0, B(1,1), N)
deallocate(tmp)
! B = 0d0
! do i=1,N
! do j=1,N
! do k=1,N
! B(i,k) = B(i,k) + A(i,j)*D(j)*A(k,j)
! end do
! end do
! end do
end subroutine
!------------------------------------------------------------------------

View File

@ -31,6 +31,8 @@ subroutine diagonalize_general_matrix(N,A,WR,VR)
call dgeev('V','V',N,A,N,WR,WI,VL,N,VR,N,work,lwork,info)
deallocate(work, WI, VL)
if(info /= 0) then
print*,'Problem in diagonalize_general_matrix (dgeev)!!'
end if
@ -68,6 +70,8 @@ subroutine diagonalize_matrix(N,A,e)
call dsyev('V','U',N,A,N,e,work,lwork,info)
deallocate(work)
if(info /= 0) then
print*,'Problem in diagonalize_matrix (dsyev)!!'
end if