diff --git a/src/GW/RGW_excitation_density.f90 b/src/GW/RGW_excitation_density.f90 index 857adf0..19d15b9 100644 --- a/src/GW/RGW_excitation_density.f90 +++ b/src/GW/RGW_excitation_density.f90 @@ -1,4 +1,4 @@ -subroutine RGW_excitation_density(nBas,nC,nO,nR,nS,ERI,XpY,rho) +subroutine RGW_excitation_density(nOrb,nC,nO,nR,nS,ERI,XpY,rho) ! Compute excitation densities @@ -6,42 +6,75 @@ subroutine RGW_excitation_density(nBas,nC,nO,nR,nS,ERI,XpY,rho) ! Input variables - integer,intent(in) :: nBas + integer,intent(in) :: nOrb integer,intent(in) :: nC integer,intent(in) :: nO integer,intent(in) :: nR integer,intent(in) :: nS - double precision,intent(in) :: ERI(nBas,nBas,nBas,nBas) + double precision,intent(in) :: ERI(nOrb,nOrb,nOrb,nOrb) double precision,intent(in) :: XpY(nS,nS) ! Local variables integer :: ia,jb,p,q,j,b + double precision, allocatable :: tmp(:,:,:) ! Output variables - double precision,intent(out) :: rho(nBas,nBas,nS) + double precision,intent(out) :: rho(nOrb,nOrb,nS) - rho(:,:,:) = 0d0 - !$OMP PARALLEL & - !$OMP SHARED(nC,nBas,nR,nO,nS,rho,ERI,XpY) & - !$OMP PRIVATE(q,p,jb,ia) & - !$OMP DEFAULT(NONE) - !$OMP DO - do q=nC+1,nBas-nR - do p=nC+1,nBas-nR + if(nOrb .lt. 256) then + + allocate(tmp(nOrb,nOrb,nS)) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p, q, j, b, jb) & + !$OMP SHARED(nOrb, nC, nO, nR, ERI, tmp) + !$OMP DO COLLAPSE(2) + do p = 1, nOrb + do q = 1, nOrb jb = 0 - do j=nC+1,nO - do b=nO+1,nBas-nR - jb = jb + 1 - do ia=1,nS - rho(p,q,ia) = rho(p,q,ia) + ERI(p,j,q,b)*XpY(ia,jb) - end do - end do - end do - end do - end do - !$OMP END DO - !$OMP END PARALLEL + do j = nC+1, nO + do b = nO+1, nOrb-nR + jb = jb + 1 + tmp(p,q,jb) = ERI(p,j,q,b) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + call dgemm("N", "T", nOrb*nOrb, nS, nS, 1.d0, & + tmp(1,1,1), nOrb*nOrb, XpY(1,1), nS, & + 0.d0, rho(1,1,1), nOrb*nOrb) + + deallocate(tmp) + + else + + rho(:,:,:) = 0d0 + !$OMP PARALLEL & + !$OMP SHARED(nC,nOrb,nR,nO,nS,rho,ERI,XpY) & + !$OMP PRIVATE(q,p,jb,ia) & + !$OMP DEFAULT(NONE) + !$OMP DO + do q=nC+1,nOrb-nR + do p=nC+1,nOrb-nR + jb = 0 + do j=nC+1,nO + do b=nO+1,nOrb-nR + jb = jb + 1 + do ia=1,nS + rho(p,q,ia) = rho(p,q,ia) + ERI(p,j,q,b)*XpY(ia,jb) + end do + end do + end do + end do + end do + !$OMP END DO + !$OMP END PARALLEL + + endif end subroutine diff --git a/src/GW/RGW_ppBSE.f90 b/src/GW/RGW_ppBSE.f90 index 346e9a2..48bc225 100644 --- a/src/GW/RGW_ppBSE.f90 +++ b/src/GW/RGW_ppBSE.f90 @@ -41,6 +41,8 @@ subroutine RGW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nOrb,nC,nO,nV,nR,nS integer :: i_data, supp_data_dbl_size, supp_data_int_size integer :: n_states, n_states_diag + double precision :: tt1, tt2 + double precision,allocatable :: Aph(:,:) double precision,allocatable :: Bph(:,:) @@ -124,24 +126,61 @@ subroutine RGW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nOrb,nC,nO,nV,nR,nS allocate(Bpp(nVV,nOO),Cpp(nVV,nVV),Dpp(nOO,nOO)) allocate(KB_sta(nVV,nOO),KC_sta(nVV,nVV),KD_sta(nOO,nOO)) + !print*, 'RGW_ppBSE_static_kernel_C:' + !call wall_time(tt1) call RGW_ppBSE_static_kernel_C(ispin,eta,nOrb,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta) + !call wall_time(tt2) + !write(*,'(A,1X,F10.3)'), 'wall time for RGW_ppBSE_static_kernel_C (sec)', tt2-tt1 + + !print*, 'RGW_ppBSE_static_kernel_D:' + !call wall_time(tt1) call RGW_ppBSE_static_kernel_D(ispin,eta,nOrb,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta) - if(.not.TDA) call RGW_ppBSE_static_kernel_B(ispin,eta,nOrb,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta) + !call wall_time(tt2) + !write(*,'(A,1X,F10.3)'), 'wall time for RGW_ppBSE_static_kernel_D (sec)', tt2-tt1 + + if(.not.TDA) then + !print*, 'RGW_ppBSE_static_kernel_B:' + !call wall_time(tt1) + call RGW_ppBSE_static_kernel_B(ispin,eta,nOrb,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta) + !call wall_time(tt2) + !write(*,'(A,1X,F10.3)'), 'wall time for RGW_ppBSE_static_kernel_B (sec)', tt2-tt1 + endif + !print*, 'ppLR_C:' + !call wall_time(tt1) call ppLR_C(ispin,nOrb,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp) + !call wall_time(tt2) + !write(*,'(A,1X,F10.3)'), 'wall time for ppLR_C (sec)', tt2-tt1 + + !print*, 'ppLR_D:' + !call wall_time(tt1) call ppLR_D(ispin,nOrb,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp) - if(.not.TDA) call ppLR_B(ispin,nOrb,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp) + !call wall_time(tt2) + !write(*,'(A,1X,F10.3)'), 'wall time for ppLR_D (sec)', tt2-tt1 + + if(.not.TDA) then + !print*, 'ppLR_B:' + !call wall_time(tt1) + call ppLR_B(ispin,nOrb,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp) + !call wall_time(tt2) + !write(*,'(A,1X,F10.3)'), 'wall time for ppLR_B (sec)', tt2-tt1 + endif Bpp(:,:) = Bpp(:,:) + KB_sta(:,:) Cpp(:,:) = Cpp(:,:) + KC_sta(:,:) Dpp(:,:) = Dpp(:,:) + KD_sta(:,:) + !print*, 'ppLR:' + !call wall_time(tt1) call ppLR(TDA,nOO,nVV,Bpp,Cpp,Dpp,Om1,X1,Y1,Om2,X2,Y2,EcBSE(ispin)) - deallocate(Bpp,Cpp,Dpp,KB_sta,KC_sta,KD_sta) + !call wall_time(tt2) + !write(*,'(A,1X,F10.3)'), 'wall time for ppLR (sec)', tt2-tt1 - !print*, 'LAPACK:' - !print*, Om2 - !print*, Om1 + deallocate(Bpp,Cpp,Dpp,KB_sta,KC_sta,KD_sta) +! +! print*, 'LAPACK:' +! print*, Om2 +! print*, Om1 ! --- @@ -151,46 +190,46 @@ subroutine RGW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nOrb,nC,nO,nV,nR,nS ! Davidson ! --- - !n_states = nOO + 5 - !n_states_diag = n_states + 4 - !allocate(Om(nOO+nVV), R(nOO+nVV,n_states_diag)) - - !supp_data_int = 1 - !allocate(supp_data_int(supp_data_int_size)) - !supp_data_int(1) = nS - - !supp_data_dbl_size = nS + nOrb*nOrb*nS + 1 - !allocate(supp_data_dbl(supp_data_dbl_size)) - !! scalars - !supp_data_dbl(1) = eta - !i_data = 1 - !! rho_RPA - !do q = 1, nOrb - ! do p = 1, nOrb - ! do m = 1, nS - ! i_data = i_data + 1 - ! supp_data_dbl(i_data) = rho_RPA(p,q,m) - ! enddo - ! enddo - !enddo - !! OmRPA - !do m = 1, nS - ! i_data = i_data + 1 - ! supp_data_dbl(i_data) = OmRPA(m) - !enddo - - !call ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, & - ! 1.d0, & ! lambda - ! eGW(1), & - ! 0.d0, & ! eF - ! ERI(1,1,1,1), & - ! supp_data_int(1), supp_data_int_size, & - ! supp_data_dbl(1), supp_data_dbl_size, & - ! Om(1), R(1,1), n_states, n_states_diag, "GW") - - !deallocate(Om, R) - !deallocate(supp_data_dbl, supp_data_int) - !stop +! n_states = nOO + 5 +! n_states_diag = n_states + 4 +! allocate(Om(nOO+nVV), R(nOO+nVV,n_states_diag)) +! +! supp_data_int = 1 +! allocate(supp_data_int(supp_data_int_size)) +! supp_data_int(1) = nS +! +! supp_data_dbl_size = nS + nOrb*nOrb*nS + 1 +! allocate(supp_data_dbl(supp_data_dbl_size)) +! ! scalars +! supp_data_dbl(1) = eta +! i_data = 1 +! ! rho_RPA +! do q = 1, nOrb +! do p = 1, nOrb +! do m = 1, nS +! i_data = i_data + 1 +! supp_data_dbl(i_data) = rho_RPA(p,q,m) +! enddo +! enddo +! enddo +! ! OmRPA +! do m = 1, nS +! i_data = i_data + 1 +! supp_data_dbl(i_data) = OmRPA(m) +! enddo +! +! call ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, & +! 1.d0, & ! lambda +! eGW(1), & +! 0.d0, & ! eF +! ERI(1,1,1,1), & +! supp_data_int(1), supp_data_int_size, & +! supp_data_dbl(1), supp_data_dbl_size, & +! Om(1), R(1,1), n_states, n_states_diag, "GW") +! +! deallocate(Om, R) +! deallocate(supp_data_dbl, supp_data_int) +! stop ! --- diff --git a/src/LR/ppLR_GW_davidson.f90 b/src/LR/ppLR_GW_davidson.f90 index 35051f1..a8c1af2 100644 --- a/src/LR/ppLR_GW_davidson.f90 +++ b/src/LR/ppLR_GW_davidson.f90 @@ -33,8 +33,7 @@ subroutine ppLR_GW_HR_calc(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda, e, eF, call wall_time(t1) - !if((nOO+nVV) .le. 20000) then - if((nOO+nVV) .le. 2) then + if((nOO+nVV) .le. 20000) then call ppLR_GW_HR_calc_oneshot(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda, e(1), eF, n_states_diag, & ERI(1,1,1,1), eta, rho(1,1,1), Om(1), U(1,1), W(1,1))