diff --git a/src/GW/RGW_excitation_density.f90 b/src/GW/RGW_excitation_density.f90 index 857adf0..197d8f6 100644 --- a/src/GW/RGW_excitation_density.f90 +++ b/src/GW/RGW_excitation_density.f90 @@ -1,4 +1,4 @@ -subroutine RGW_excitation_density(nBas,nC,nO,nR,nS,ERI,XpY,rho) +subroutine RGW_excitation_density(nOrb,nC,nO,nR,nS,ERI,XpY,rho) ! Compute excitation densities @@ -6,42 +6,75 @@ subroutine RGW_excitation_density(nBas,nC,nO,nR,nS,ERI,XpY,rho) ! Input variables - integer,intent(in) :: nBas + integer,intent(in) :: nOrb integer,intent(in) :: nC integer,intent(in) :: nO integer,intent(in) :: nR integer,intent(in) :: nS - double precision,intent(in) :: ERI(nBas,nBas,nBas,nBas) + double precision,intent(in) :: ERI(nOrb,nOrb,nOrb,nOrb) double precision,intent(in) :: XpY(nS,nS) ! Local variables integer :: ia,jb,p,q,j,b + double precision, allocatable :: tmp(:,:,:) ! Output variables - double precision,intent(out) :: rho(nBas,nBas,nS) + double precision,intent(out) :: rho(nOrb,nOrb,nS) - rho(:,:,:) = 0d0 - !$OMP PARALLEL & - !$OMP SHARED(nC,nBas,nR,nO,nS,rho,ERI,XpY) & - !$OMP PRIVATE(q,p,jb,ia) & - !$OMP DEFAULT(NONE) - !$OMP DO - do q=nC+1,nBas-nR - do p=nC+1,nBas-nR + if(nOrb .lt. 256) then + + allocate(tmp(nOrb,nOrb,nS)) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p, q, j, b, jb) & + !$OMP SHARED(nOrb, nC, nO, nR, ERI, tmp) & + !$OMP DO COLLAPSE(2) + do p = 1, nOrb + do q = 1, nOrb jb = 0 - do j=nC+1,nO - do b=nO+1,nBas-nR - jb = jb + 1 - do ia=1,nS - rho(p,q,ia) = rho(p,q,ia) + ERI(p,j,q,b)*XpY(ia,jb) - end do - end do - end do - end do - end do - !$OMP END DO - !$OMP END PARALLEL + do j = nC+1, nO + do b = nO+1, nOrb-nR + jb = jb + 1 + tmp(p,q,jb) = ERI(p,j,q,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END COLLAPSE + + call dgemm("N", "T", nOrb*nOrb, nS, nS, 1.d0, & + tmp(1,1,1), nOrb*nOrb, XpY(1,1), nS, & + 0.d0, rho(1,1,1), nOrb*nOrb) + + deallocate(tmp) + + else + + rho(:,:,:) = 0d0 + !$OMP PARALLEL & + !$OMP SHARED(nC,nOrb,nR,nO,nS,rho,ERI,XpY) & + !$OMP PRIVATE(q,p,jb,ia) & + !$OMP DEFAULT(NONE) + !$OMP DO + do q=nC+1,nOrb-nR + do p=nC+1,nOrb-nR + jb = 0 + do j=nC+1,nO + do b=nO+1,nOrb-nR + jb = jb + 1 + do ia=1,nS + rho(p,q,ia) = rho(p,q,ia) + ERI(p,j,q,b)*XpY(ia,jb) + end do + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + endif end subroutine diff --git a/src/GW/RGW_ppBSE.f90 b/src/GW/RGW_ppBSE.f90 index 346e9a2..96ba78a 100644 --- a/src/GW/RGW_ppBSE.f90 +++ b/src/GW/RGW_ppBSE.f90 @@ -124,24 +124,61 @@ subroutine RGW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nOrb,nC,nO,nV,nR,nS allocate(Bpp(nVV,nOO),Cpp(nVV,nVV),Dpp(nOO,nOO)) allocate(KB_sta(nVV,nOO),KC_sta(nVV,nVV),KD_sta(nOO,nOO)) + print*, 'RGW_ppBSE_static_kernel_C:' + call wall_time(tt1) call RGW_ppBSE_static_kernel_C(ispin,eta,nOrb,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta) + call wall_time(tt2) + write(*,'(A,1X,F10.3)'), 'wall time for RGW_ppBSE_static_kernel_C (sec)', tt2-tt1 + + print*, 'RGW_ppBSE_static_kernel_D:' + call wall_time(tt1) call RGW_ppBSE_static_kernel_D(ispin,eta,nOrb,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta) - if(.not.TDA) call RGW_ppBSE_static_kernel_B(ispin,eta,nOrb,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta) + call wall_time(tt2) + write(*,'(A,1X,F10.3)'), 'wall time for RGW_ppBSE_static_kernel_D (sec)', tt2-tt1 + + if(.not.TDA) then + print*, 'RGW_ppBSE_static_kernel_B:' + call wall_time(tt1) + call RGW_ppBSE_static_kernel_B(ispin,eta,nOrb,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta) + call wall_time(tt2) + write(*,'(A,1X,F10.3)'), 'wall time for RGW_ppBSE_static_kernel_B (sec)', tt2-tt1 + endif + print*, 'ppLR_C:' + call wall_time(tt1) call ppLR_C(ispin,nOrb,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp) + call wall_time(tt2) + write(*,'(A,1X,F10.3)'), 'wall time for ppLR_C (sec)', tt2-tt1 + + print*, 'ppLR_D:' + call wall_time(tt1) call ppLR_D(ispin,nOrb,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp) - if(.not.TDA) call ppLR_B(ispin,nOrb,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp) + call wall_time(tt2) + write(*,'(A,1X,F10.3)'), 'wall time for ppLR_D (sec)', tt2-tt1 + + if(.not.TDA) then + print*, 'ppLR_B:' + call wall_time(tt1) + call ppLR_B(ispin,nOrb,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp) + call wall_time(tt2) + write(*,'(A,1X,F10.3)'), 'wall time for ppLR_B (sec)', tt2-tt1 + endif Bpp(:,:) = Bpp(:,:) + KB_sta(:,:) Cpp(:,:) = Cpp(:,:) + KC_sta(:,:) Dpp(:,:) = Dpp(:,:) + KD_sta(:,:) + print*, 'ppLR:' + call wall_time(tt1) call ppLR(TDA,nOO,nVV,Bpp,Cpp,Dpp,Om1,X1,Y1,Om2,X2,Y2,EcBSE(ispin)) + call wall_time(tt2) + write(*,'(A,1X,F10.3)'), 'wall time for ppLR (sec)', tt2-tt1 + deallocate(Bpp,Cpp,Dpp,KB_sta,KC_sta,KD_sta) - !print*, 'LAPACK:' - !print*, Om2 - !print*, Om1 + print*, 'LAPACK:' + print*, Om2 + print*, Om1 ! --- @@ -151,46 +188,46 @@ subroutine RGW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nOrb,nC,nO,nV,nR,nS ! Davidson ! --- - !n_states = nOO + 5 - !n_states_diag = n_states + 4 - !allocate(Om(nOO+nVV), R(nOO+nVV,n_states_diag)) + n_states = nOO + 5 + n_states_diag = n_states + 4 + allocate(Om(nOO+nVV), R(nOO+nVV,n_states_diag)) - !supp_data_int = 1 - !allocate(supp_data_int(supp_data_int_size)) - !supp_data_int(1) = nS + supp_data_int = 1 + allocate(supp_data_int(supp_data_int_size)) + supp_data_int(1) = nS - !supp_data_dbl_size = nS + nOrb*nOrb*nS + 1 - !allocate(supp_data_dbl(supp_data_dbl_size)) - !! scalars - !supp_data_dbl(1) = eta - !i_data = 1 - !! rho_RPA - !do q = 1, nOrb - ! do p = 1, nOrb - ! do m = 1, nS - ! i_data = i_data + 1 - ! supp_data_dbl(i_data) = rho_RPA(p,q,m) - ! enddo - ! enddo - !enddo - !! OmRPA - !do m = 1, nS - ! i_data = i_data + 1 - ! supp_data_dbl(i_data) = OmRPA(m) - !enddo + supp_data_dbl_size = nS + nOrb*nOrb*nS + 1 + allocate(supp_data_dbl(supp_data_dbl_size)) + ! scalars + supp_data_dbl(1) = eta + i_data = 1 + ! rho_RPA + do q = 1, nOrb + do p = 1, nOrb + do m = 1, nS + i_data = i_data + 1 + supp_data_dbl(i_data) = rho_RPA(p,q,m) + enddo + enddo + enddo + ! OmRPA + do m = 1, nS + i_data = i_data + 1 + supp_data_dbl(i_data) = OmRPA(m) + enddo - !call ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, & - ! 1.d0, & ! lambda - ! eGW(1), & - ! 0.d0, & ! eF - ! ERI(1,1,1,1), & - ! supp_data_int(1), supp_data_int_size, & - ! supp_data_dbl(1), supp_data_dbl_size, & - ! Om(1), R(1,1), n_states, n_states_diag, "GW") + call ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, & + 1.d0, & ! lambda + eGW(1), & + 0.d0, & ! eF + ERI(1,1,1,1), & + supp_data_int(1), supp_data_int_size, & + supp_data_dbl(1), supp_data_dbl_size, & + Om(1), R(1,1), n_states, n_states_diag, "GW") - !deallocate(Om, R) - !deallocate(supp_data_dbl, supp_data_int) - !stop + deallocate(Om, R) + deallocate(supp_data_dbl, supp_data_int) + stop ! --- diff --git a/src/LR/ppLR_GW_davidson.f90 b/src/LR/ppLR_GW_davidson.f90 index 35051f1..a8c1af2 100644 --- a/src/LR/ppLR_GW_davidson.f90 +++ b/src/LR/ppLR_GW_davidson.f90 @@ -33,8 +33,7 @@ subroutine ppLR_GW_HR_calc(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda, e, eF, call wall_time(t1) - !if((nOO+nVV) .le. 20000) then - if((nOO+nVV) .le. 2) then + if((nOO+nVV) .le. 20000) then call ppLR_GW_HR_calc_oneshot(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda, e(1), eF, n_states_diag, & ERI(1,1,1,1), eta, rho(1,1,1), Om(1), U(1,1), W(1,1))