From 989bd99f17b6d9371520f6e22c597c98b9564b83 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 19 Aug 2024 18:11:39 +0200 Subject: [PATCH 01/15] saving tmp --- src/GT/GTpp_excitation_density.f90 | 9 ++- src/GT/RG0T0pp.f90 | 88 ++++++++++++++++++++++++++---- 2 files changed, 84 insertions(+), 13 deletions(-) diff --git a/src/GT/GTpp_excitation_density.f90 b/src/GT/GTpp_excitation_density.f90 index a6ed548..ba32107 100644 --- a/src/GT/GTpp_excitation_density.f90 +++ b/src/GT/GTpp_excitation_density.f90 @@ -2,6 +2,7 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 ! Compute excitation densities for T-matrix self-energy + implicit none ! Input variables @@ -44,6 +45,8 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 1) then + print*, "ispin = ", ispin + !$OMP PARALLEL & !$OMP SHARED(nC,nBas,nR,nO,nVV,nOO,rho1,rho2,ERI,X1,Y1,X2,Y2) & !$OMP PRIVATE(q,p,ab,cd,kl,ij) & @@ -123,10 +126,11 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 2 .or. ispin == 4) then + print*, "ispin = ", ispin + do q=nC+1,nBas-nR do p=nC+1,nBas-nR -! do ab=1,nVV ab = 0 do a=nO+1,nBas-nR do b=a+1,nBas-nR @@ -153,7 +157,6 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 end do end do -! do ij=1,nOO ij = 0 do i=nC+1,nO do j=i+1,nO @@ -190,6 +193,8 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 !---------------------------------------------- if(ispin == 3) then + + print*, "ispin = ", ispin !$OMP PARALLEL & !$OMP SHARED(nC,nBas,nR,nO,nVV,nOO,rho1,rho2,ERI,X1,Y1,X2,Y2) & diff --git a/src/GT/RG0T0pp.f90 b/src/GT/RG0T0pp.f90 index e624bca..2975934 100644 --- a/src/GT/RG0T0pp.f90 +++ b/src/GT/RG0T0pp.f90 @@ -64,6 +64,11 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d double precision,allocatable :: eGT(:) double precision,allocatable :: eGTlin(:) + double precision :: t0, t1 + double precision :: tt0, tt1 + + call wall_time(t0) + ! Output variables ! Hello world @@ -122,11 +127,25 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d allocate(Bpp(nVVs,nOOs),Cpp(nVVs,nVVs),Dpp(nOOs,nOOs)) - if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,1d0,ERI,Bpp) - call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVs,1d0,eHF,ERI,Cpp) - call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOs,1d0,eHF,ERI,Dpp) + call wall_time(tt0) + call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVs,1d0,eHF,ERI,Cpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_C = ',tt1-tt0,' seconds' + call wall_time(tt0) + call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOs,1d0,eHF,ERI,Dpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_D = ',tt1-tt0,' seconds' + + call wall_time(tt0) + if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,1d0,ERI,Bpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_B = ',tt1-tt0,' seconds' + + call wall_time(tt0) call ppLR(TDA_T,nOOs,nVVs,Bpp,Cpp,Dpp,Om1s,X1s,Y1s,Om2s,X2s,Y2s,EcRPA(ispin)) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR = ',tt1-tt0,' seconds' deallocate(Bpp,Cpp,Dpp) @@ -145,11 +164,25 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d allocate(Bpp(nVVt,nOOt),Cpp(nVVt,nVVt),Dpp(nOOt,nOOt)) - if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,1d0,ERI,Bpp) - call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVt,1d0,eHF,ERI,Cpp) - call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOt,1d0,eHF,ERI,Dpp) + call wall_time(tt0) + call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVt,1d0,eHF,ERI,Cpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_C = ',tt1-tt0,' seconds' + call wall_time(tt0) + call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOt,1d0,eHF,ERI,Dpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_D = ',tt1-tt0,' seconds' + + call wall_time(tt0) + if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,1d0,ERI,Bpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_B = ',tt1-tt0,' seconds' + + call wall_time(tt0) call ppLR(TDA_T,nOOt,nVVt,Bpp,Cpp,Dpp,Om1t,X1t,Y1t,Om2t,X2t,Y2t,EcRPA(ispin)) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR = ',tt1-tt0,' seconds' deallocate(Bpp,Cpp,Dpp) @@ -162,16 +195,25 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d ! iblock = 1 iblock = 3 + + call wall_time(tt0) call GTpp_excitation_density(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,ERI,X1s,Y1s,rho1s,X2s,Y2s,rho2s) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GTpp_excitation_density = ',tt1-tt0,' seconds' ! iblock = 2 iblock = 4 + + call wall_time(tt0) call GTpp_excitation_density(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,ERI,X1t,Y1t,rho1t,X2t,Y2t,rho2t) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GTpp_excitation_density = ',tt1-tt0,' seconds' !---------------------------------------------- ! Compute T-matrix version of the self-energy !---------------------------------------------- + call wall_time(tt0) if(regularize) then call GTpp_regularization(nBas,nC,nO,nV,nR,nOOs,nVVs,eHF,Om1s,rho1s,Om2s,rho2s) call GTpp_regularization(nBas,nC,nO,nV,nR,nOOt,nVVt,eHF,Om1t,rho1t,Om2t,rho2t) @@ -179,10 +221,14 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d call GTpp_self_energy_diag(eta,nBas,nC,nO,nV,nR,nOOs,nVVs,nOOt,nVVt,eHF,Om1s,rho1s,Om2s,rho2s, & Om1t,rho1t,Om2t,rho2t,EcGM,Sig,Z) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for self-energy = ',tt1-tt0,' seconds' !---------------------------------------------- ! Solve the quasi-particle equation !---------------------------------------------- + + call wall_time(tt0) eGTlin(:) = eHF(:) + Z(:)*Sig(:) @@ -203,6 +249,9 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d end if + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time to solve QP = ',tt1-tt0,' seconds' + ! call GTpp_plot_self_energy(nBas,nC,nO,nV,nR,nOOs,nVVs,nOOt,nVVt,eHF,eGT,Om1s,rho1s,Om2s,rho2s, & ! Om1t,rho1t,Om2t,rho2t) @@ -218,9 +267,9 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d allocate(Bpp(nVVs,nOOs),Cpp(nVVs,nVVs),Dpp(nOOs,nOOs)) + call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVs,1d0,eGT,ERI,Cpp) + call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOs,1d0,eGT,ERI,Dpp) if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,1d0,ERI,Bpp) - call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVs,1d0,eGT,ERI,Cpp) - call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOs,1d0,eGT,ERI,Dpp) call ppLR(TDA_T,nOOs,nVVs,Bpp,Cpp,Dpp,Om1s,X1s,Y1s,Om2s,X2s,Y2s,EcRPA(ispin)) @@ -232,11 +281,25 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d allocate(Bpp(nVVt,nOOt),Cpp(nVVt,nVVt),Dpp(nOOt,nOOt)) - if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,1d0,ERI,Bpp) - call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVt,1d0,eGT,ERI,Cpp) - call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOt,1d0,eGT,ERI,Dpp) + call wall_time(tt0) + call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVt,1d0,eGT,ERI,Cpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_C = ',tt1-tt0,' seconds' + call wall_time(tt0) + call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOt,1d0,eGT,ERI,Dpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_D = ',tt1-tt0,' seconds' + + call wall_time(tt0) + if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,1d0,ERI,Bpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_B = ',tt1-tt0,' seconds' + + call wall_time(tt0) call ppLR(TDA_T,nOOt,nVVt,Bpp,Cpp,Dpp,Om1t,X1t,Y1t,Om2t,X2t,Y2t,EcRPA(ispin)) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR = ',tt1-tt0,' seconds' deallocate(Bpp,Cpp,Dpp) @@ -336,4 +399,7 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d end if + call wall_time(t1) + write(*,'(A65,1X,F9.3,A8)') 'Total Wall time for RG0T0pp = ',t1-t0,' seconds' + end subroutine From 50bfb261ca040712755efa6b44f40f9aee6c6917 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 19 Aug 2024 18:44:03 +0200 Subject: [PATCH 02/15] add OpenMP + Collapse --- src/GT/GTpp_excitation_density.f90 | 214 ++++++++++++++++------------- 1 file changed, 119 insertions(+), 95 deletions(-) diff --git a/src/GT/GTpp_excitation_density.f90 b/src/GT/GTpp_excitation_density.f90 index ba32107..e86a524 100644 --- a/src/GT/GTpp_excitation_density.f90 +++ b/src/GT/GTpp_excitation_density.f90 @@ -47,11 +47,10 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 print*, "ispin = ", ispin - !$OMP PARALLEL & - !$OMP SHARED(nC,nBas,nR,nO,nVV,nOO,rho1,rho2,ERI,X1,Y1,X2,Y2) & - !$OMP PRIVATE(q,p,ab,cd,kl,ij) & - !$OMP DEFAULT(NONE) - !$OMP DO + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & + !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) + !$OMP DO COLLAPSE(2) do q=nC+1,nBas-nR do p=nC+1,nBas-nR @@ -116,8 +115,8 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 end do end do - !$OMP END DO - !$OMP END PARALLEL + !$OMP END DO + !$OMP END PARALLEL end if !---------------------------------------------- @@ -128,63 +127,81 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 print*, "ispin = ", ispin - do q=nC+1,nBas-nR - do p=nC+1,nBas-nR + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & + !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) + !$OMP DO COLLAPSE(2) + do q = nC+1, nBas-nR + do p = nC+1, nBas-nR ab = 0 - do a=nO+1,nBas-nR - do b=a+1,nBas-nR + + do a = nO+1, nBas-nR + do b = a+1, nBas-nR + ab = ab + 1 cd = 0 - do c=nO+1,nBas-nR - do d=c+1,nBas-nR + do c = nO+1, nBas-nR + do d = c+1, nBas-nR + cd = cd + 1 + rho1(p,q,ab) = rho1(p,q,ab) & + (ERI(p,q,c,d) - ERI(p,q,d,c))*X1(cd,ab) - end do - end do + end do ! d + end do ! c kl = 0 - do k=nC+1,nO - do l=k+1,nO + do k = nC+1, nO + do l = k+1, nO + kl = kl + 1 + rho1(p,q,ab) = rho1(p,q,ab) & + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y1(kl,ab) - end do - end do + end do ! l + end do ! k - end do - end do + end do ! b + end do ! a - ij = 0 - do i=nC+1,nO - do j=i+1,nO + ij = 0 + do i = nC+1, nO + do j = i+1, nO + ij = ij + 1 - + cd = 0 - do c=nO+1,nBas-nR - do d=c+1,nBas-nR + + do c = nO+1, nBas-nR + do d = c+1, nBas-nR + cd = cd + 1 + rho2(p,q,ij) = rho2(p,q,ij) & + (ERI(p,q,c,d) - ERI(p,q,d,c))*X2(cd,ij) - end do - end do - + end do ! d + end do ! c + kl = 0 - do k=nC+1,nO - do l=k+1,nO + do k = nC+1, nO + do l = k+1, nO + kl = kl + 1 + rho2(p,q,ij) = rho2(p,q,ij) & + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y2(kl,ij) - end do - end do - - end do - end do + end do ! l + end do ! k - end do - end do + end do ! j + end do ! i + + end do ! p + end do ! q + !$OMP END DO + !$OMP END PARALLEL end if @@ -196,69 +213,76 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 print*, "ispin = ", ispin - !$OMP PARALLEL & - !$OMP SHARED(nC,nBas,nR,nO,nVV,nOO,rho1,rho2,ERI,X1,Y1,X2,Y2) & - !$OMP PRIVATE(q,p,ab,cd,kl,ij,c,d,k,l) & - !$OMP DEFAULT(NONE) - !$OMP DO + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & + !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) + !$OMP DO COLLAPSE(2) - do q=nC+1,nBas-nR - do p=nC+1,nBas-nR + do q = nC+1, nBas-nR + do p = nC+1, nBas-nR - ! do ab=1,nVV - ab = 0 - do a=nO+1,nBas-nR - do b=nO+1,nBas-nR - ab = ab + 1 - - cd = 0 - do c=nO+1,nBas-nR - do d=nO+1,nBas-nR - cd = cd + 1 - rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,c,d)*X1(cd,ab) - end do - end do - - kl = 0 - do k=nC+1,nO - do l=nC+1,nO - kl = kl + 1 - rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,k,l)*Y1(kl,ab) - end do - end do + ab = 0 + do a = nO+1, nBas-nR + do b = nO+1, nBas-nR + + ab = ab + 1 + cd = 0 + do c = nO+1, nBas-nR + do d = nO+1, nBas-nR + + cd = cd + 1 + + rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,c,d)*X1(cd,ab) end do - end do - - ! do ij=1,nOO - ij = 0 - do i=nC+1,nO - do j=nC+1,nO - ij = ij + 1 - - cd = 0 - do c=nO+1,nBas-nR - do d=nO+1,nBas-nR - cd = cd + 1 - rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,c,d)*X2(cd,ij) - end do - end do - - kl = 0 - do k=nC+1,nO - do l=nC+1,nO - kl = kl + 1 - rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,k,l)*Y2(kl,ij) - end do - end do + end do + kl = 0 + do k = nC+1, nO + do l = nC+1, nO + + kl = kl + 1 + + rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,k,l)*Y1(kl,ab) end do - end do - + end do + + end do end do - end do - !$OMP END DO - !$OMP END PARALLEL + + ij = 0 + do i = nC+1, nO + do j = nC+1, nO + + ij = ij + 1 + + cd = 0 + do c = nO+1, nBas-nR + do d = nO+1, nBas-nR + + cd = cd + 1 + + rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,c,d)*X2(cd,ij) + end do + end do + + kl = 0 + do k = nC+1, nO + do l = nC+1, nO + + kl = kl + 1 + + rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,k,l)*Y2(kl,ij) + end do + end do + + end do + end do + + end do + end do + !$OMP END DO + !$OMP END PARALLEL end if From 635e7ae457e6f2df8f90663895108e1856142b4d Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 19 Aug 2024 19:22:37 +0200 Subject: [PATCH 03/15] rho1 & rho2: use DGEMM instead of OpenMP --- src/GT/GTpp_excitation_density.f90 | 186 +++++++++++++++++++---------- 1 file changed, 122 insertions(+), 64 deletions(-) diff --git a/src/GT/GTpp_excitation_density.f90 b/src/GT/GTpp_excitation_density.f90 index e86a524..8b4b1c2 100644 --- a/src/GT/GTpp_excitation_density.f90 +++ b/src/GT/GTpp_excitation_density.f90 @@ -34,6 +34,10 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 double precision,intent(out) :: rho1(nBas,nBas,nVV) double precision,intent(out) :: rho2(nBas,nBas,nOO) + integer :: dim_1, dim_2 + double precision, allocatable :: ERI_1(:,:,:) + double precision, allocatable :: ERI_2(:,:,:) + ! Initialization rho1(:,:,:) = 0d0 @@ -209,81 +213,135 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 ! alpha-beta block !---------------------------------------------- + ! TODO + ! debug for nC & nR + if(ispin == 3) then print*, "ispin = ", ispin - - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & - !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) + + dim_1 = (nBas - nO) * (nBas - nO) + dim_2 = nO * nO + + allocate(ERI_1(nBas,nBas,dim_1), ERI_2(nBas,nBas,dim_2)) + ERI_1 = 0.d0 + ERI_2 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p, q, c, d, cd, k, l, kl) & + !$OMP SHARED(nC, nBas, nR, nO, ERI_1, ERI_2, ERI) !$OMP DO COLLAPSE(2) - do q = nC+1, nBas-nR do p = nC+1, nBas-nR - - ab = 0 - do a = nO+1, nBas-nR - do b = nO+1, nBas-nR - - ab = ab + 1 - - cd = 0 - do c = nO+1, nBas-nR - do d = nO+1, nBas-nR - - cd = cd + 1 - - rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,c,d)*X1(cd,ab) - end do - end do - - kl = 0 - do k = nC+1, nO - do l = nC+1, nO - - kl = kl + 1 - - rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,k,l)*Y1(kl,ab) - end do - end do - + cd = 0 + do c = nO+1, nBas-nR + do d = nO+1, nBas-nR + cd = cd + 1 + ERI_1(p,q,cd) = ERI(p,q,c,d) + enddo + enddo + kl = 0 + do k = nC+1, nO + do l = nC+1, nO + kl = kl + 1 + ERI_2(p,q,kl) = ERI(p,q,k,l) end do end do - - ij = 0 - do i = nC+1, nO - do j = nC+1, nO - - ij = ij + 1 - - cd = 0 - do c = nO+1, nBas-nR - do d = nO+1, nBas-nR - - cd = cd + 1 - - rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,c,d)*X2(cd,ij) - end do - end do - - kl = 0 - do k = nC+1, nO - do l = nC+1, nO - - kl = kl + 1 - - rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,k,l)*Y2(kl,ij) - end do - end do - - end do - end do - - end do - end do + enddo + enddo !$OMP END DO !$OMP END PARALLEL + call dgemm("N", "N", nBas*nBas, dim_1, dim_1, 1.d0, & + ERI_1(1,1,1), nBas*nBas, X1(1,1), dim_1, & + 0.d0, rho1(1,1,1), nBas*nBas) + + call dgemm("N", "N", nBas*nBas, dim_1, dim_2, 1.d0, & + ERI_2(1,1,1), nBas*nBas, Y1(1,1), dim_2, & + 1.d0, rho1(1,1,1), nBas*nBas) + + call dgemm("N", "N", nBas*nBas, dim_2, dim_1, 1.d0, & + ERI_1(1,1,1), nBas*nBas, X2(1,1), dim_1, & + 0.d0, rho2(1,1,1), nBas*nBas) + + call dgemm("N", "N", nBas*nBas, dim_2, dim_2, 1.d0, & + ERI_2(1,1,1), nBas*nBas, Y2(1,1), dim_2, & + 1.d0, rho2(1,1,1), nBas*nBas) + + deallocate(ERI_1, ERI_2) + + +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & +! !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) +! !$OMP DO COLLAPSE(2) +! +! do q = nC+1, nBas-nR +! do p = nC+1, nBas-nR +! +! ab = 0 +! do a = nO+1, nBas-nR +! do b = nO+1, nBas-nR +! +! ab = ab + 1 +! +! cd = 0 +! do c = nO+1, nBas-nR +! do d = nO+1, nBas-nR +! +! cd = cd + 1 +! +! rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,c,d)*X1(cd,ab) +! end do +! end do +! +! kl = 0 +! do k = nC+1, nO +! do l = nC+1, nO +! +! kl = kl + 1 +! +! rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,k,l)*Y1(kl,ab) +! end do +! end do +! +! end do +! end do +! +! ij = 0 +! do i = nC+1, nO +! do j = nC+1, nO +! +! ij = ij + 1 +! +! cd = 0 +! do c = nO+1, nBas-nR +! do d = nO+1, nBas-nR +! +! cd = cd + 1 +! +! rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,c,d)*X2(cd,ij) +! end do +! end do +! +! kl = 0 +! do k = nC+1, nO +! do l = nC+1, nO +! +! kl = kl + 1 +! +! rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,k,l)*Y2(kl,ij) +! end do +! end do +! +! end do +! end do +! +! end do +! end do +! !$OMP END DO +! !$OMP END PARALLEL + end if end subroutine From c9fa0470aaea5633056e64da1a1d77152ed515d5 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Mon, 19 Aug 2024 19:32:33 +0200 Subject: [PATCH 04/15] rm time printing --- src/GT/GTpp_excitation_density.f90 | 6 --- src/GT/RG0T0pp.f90 | 63 ------------------------------ 2 files changed, 69 deletions(-) diff --git a/src/GT/GTpp_excitation_density.f90 b/src/GT/GTpp_excitation_density.f90 index 8b4b1c2..fe86ef3 100644 --- a/src/GT/GTpp_excitation_density.f90 +++ b/src/GT/GTpp_excitation_density.f90 @@ -49,8 +49,6 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 1) then - print*, "ispin = ", ispin - !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) @@ -129,8 +127,6 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 2 .or. ispin == 4) then - print*, "ispin = ", ispin - !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) @@ -218,8 +214,6 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 3) then - print*, "ispin = ", ispin - dim_1 = (nBas - nO) * (nBas - nO) dim_2 = nO * nO diff --git a/src/GT/RG0T0pp.f90 b/src/GT/RG0T0pp.f90 index 2975934..5954adf 100644 --- a/src/GT/RG0T0pp.f90 +++ b/src/GT/RG0T0pp.f90 @@ -64,10 +64,6 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d double precision,allocatable :: eGT(:) double precision,allocatable :: eGTlin(:) - double precision :: t0, t1 - double precision :: tt0, tt1 - - call wall_time(t0) ! Output variables @@ -127,25 +123,11 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d allocate(Bpp(nVVs,nOOs),Cpp(nVVs,nVVs),Dpp(nOOs,nOOs)) - call wall_time(tt0) call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVs,1d0,eHF,ERI,Cpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_C = ',tt1-tt0,' seconds' - - call wall_time(tt0) call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOs,1d0,eHF,ERI,Dpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_D = ',tt1-tt0,' seconds' - - call wall_time(tt0) if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,1d0,ERI,Bpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_B = ',tt1-tt0,' seconds' - call wall_time(tt0) call ppLR(TDA_T,nOOs,nVVs,Bpp,Cpp,Dpp,Om1s,X1s,Y1s,Om2s,X2s,Y2s,EcRPA(ispin)) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR = ',tt1-tt0,' seconds' deallocate(Bpp,Cpp,Dpp) @@ -164,25 +146,11 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d allocate(Bpp(nVVt,nOOt),Cpp(nVVt,nVVt),Dpp(nOOt,nOOt)) - call wall_time(tt0) call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVt,1d0,eHF,ERI,Cpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_C = ',tt1-tt0,' seconds' - - call wall_time(tt0) call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOt,1d0,eHF,ERI,Dpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_D = ',tt1-tt0,' seconds' - - call wall_time(tt0) if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,1d0,ERI,Bpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_B = ',tt1-tt0,' seconds' - call wall_time(tt0) call ppLR(TDA_T,nOOt,nVVt,Bpp,Cpp,Dpp,Om1t,X1t,Y1t,Om2t,X2t,Y2t,EcRPA(ispin)) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR = ',tt1-tt0,' seconds' deallocate(Bpp,Cpp,Dpp) @@ -196,24 +164,17 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d ! iblock = 1 iblock = 3 - call wall_time(tt0) call GTpp_excitation_density(iblock,nBas,nC,nO,nV,nR,nOOs,nVVs,ERI,X1s,Y1s,rho1s,X2s,Y2s,rho2s) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GTpp_excitation_density = ',tt1-tt0,' seconds' ! iblock = 2 iblock = 4 - call wall_time(tt0) call GTpp_excitation_density(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,ERI,X1t,Y1t,rho1t,X2t,Y2t,rho2t) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GTpp_excitation_density = ',tt1-tt0,' seconds' !---------------------------------------------- ! Compute T-matrix version of the self-energy !---------------------------------------------- - call wall_time(tt0) if(regularize) then call GTpp_regularization(nBas,nC,nO,nV,nR,nOOs,nVVs,eHF,Om1s,rho1s,Om2s,rho2s) call GTpp_regularization(nBas,nC,nO,nV,nR,nOOt,nVVt,eHF,Om1t,rho1t,Om2t,rho2t) @@ -221,15 +182,11 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d call GTpp_self_energy_diag(eta,nBas,nC,nO,nV,nR,nOOs,nVVs,nOOt,nVVt,eHF,Om1s,rho1s,Om2s,rho2s, & Om1t,rho1t,Om2t,rho2t,EcGM,Sig,Z) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for self-energy = ',tt1-tt0,' seconds' !---------------------------------------------- ! Solve the quasi-particle equation !---------------------------------------------- - call wall_time(tt0) - eGTlin(:) = eHF(:) + Z(:)*Sig(:) if(linearize) then @@ -249,9 +206,6 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d end if - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time to solve QP = ',tt1-tt0,' seconds' - ! call GTpp_plot_self_energy(nBas,nC,nO,nV,nR,nOOs,nVVs,nOOt,nVVt,eHF,eGT,Om1s,rho1s,Om2s,rho2s, & ! Om1t,rho1t,Om2t,rho2t) @@ -281,25 +235,11 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d allocate(Bpp(nVVt,nOOt),Cpp(nVVt,nVVt),Dpp(nOOt,nOOt)) - call wall_time(tt0) call ppLR_C(iblock,nBas,nC,nO,nV,nR,nVVt,1d0,eGT,ERI,Cpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_C = ',tt1-tt0,' seconds' - - call wall_time(tt0) call ppLR_D(iblock,nBas,nC,nO,nV,nR,nOOt,1d0,eGT,ERI,Dpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_D = ',tt1-tt0,' seconds' - - call wall_time(tt0) if(.not.TDA_T) call ppLR_B(iblock,nBas,nC,nO,nV,nR,nOOt,nVVt,1d0,ERI,Bpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_B = ',tt1-tt0,' seconds' - call wall_time(tt0) call ppLR(TDA_T,nOOt,nVVt,Bpp,Cpp,Dpp,Om1t,X1t,Y1t,Om2t,X2t,Y2t,EcRPA(ispin)) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR = ',tt1-tt0,' seconds' deallocate(Bpp,Cpp,Dpp) @@ -399,7 +339,4 @@ subroutine RG0T0pp(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,TDA_T,TDA,dBSE,d end if - call wall_time(t1) - write(*,'(A65,1X,F9.3,A8)') 'Total Wall time for RG0T0pp = ',t1-t0,' seconds' - end subroutine From 4fadf5c1bb98c060a17c8d8fec2ca5bf0ce5de2c Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 21 Aug 2024 09:03:40 +0200 Subject: [PATCH 05/15] OpenMP -> DGEMM for ispin=2,4 in GTpp_excitation_density --- src/GT/GTpp_excitation_density.f90 | 195 +++++++++++++++++++---------- 1 file changed, 127 insertions(+), 68 deletions(-) diff --git a/src/GT/GTpp_excitation_density.f90 b/src/GT/GTpp_excitation_density.f90 index fe86ef3..d793d5c 100644 --- a/src/GT/GTpp_excitation_density.f90 +++ b/src/GT/GTpp_excitation_density.f90 @@ -2,6 +2,11 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 ! Compute excitation densities for T-matrix self-energy + ! TODO + ! debug DGEMM for nC != 0 + ! and nR != 0 + + implicit none @@ -49,6 +54,8 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 1) then + print*, "ispin = ", ispin + !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) @@ -127,81 +134,134 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 2 .or. ispin == 4) then - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & - !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) + print*, "ispin = ", ispin + + dim_1 = (nBas - nO) * (nBas - nO - 1) / 2 + dim_2 = nO * (nO - 1) / 2 + + allocate(ERI_1(nBas,nBas,dim_1), ERI_2(nBas,nBas,dim_2)) + ERI_1 = 0.d0 + ERI_2 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p, q, c, d, cd, k, l, kl) & + !$OMP SHARED(nC, nBas, nR, nO, ERI_1, ERI_2, ERI) !$OMP DO COLLAPSE(2) do q = nC+1, nBas-nR do p = nC+1, nBas-nR - - ab = 0 + cd = 0 + do c = nO+1, nBas-nR + do d = c+1, nBas-nR + cd = cd + 1 + ERI_1(p,q,cd) = ERI(p,q,c,d) - ERI(p,q,d,c) + enddo + enddo + kl = 0 + do k = nC+1, nO + do l = k+1, nO + kl = kl + 1 + ERI_2(p,q,kl) = ERI(p,q,k,l) - ERI(p,q,l,k) + end do + end do + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - do a = nO+1, nBas-nR - do b = a+1, nBas-nR + call dgemm("N", "N", nBas*nBas, dim_1, dim_1, 1.d0, & + ERI_1(1,1,1), nBas*nBas, X1(1,1), dim_1, & + 0.d0, rho1(1,1,1), nBas*nBas) - ab = ab + 1 - - cd = 0 - do c = nO+1, nBas-nR - do d = c+1, nBas-nR + call dgemm("N", "N", nBas*nBas, dim_1, dim_2, 1.d0, & + ERI_2(1,1,1), nBas*nBas, Y1(1,1), dim_2, & + 1.d0, rho1(1,1,1), nBas*nBas) - cd = cd + 1 + call dgemm("N", "N", nBas*nBas, dim_2, dim_1, 1.d0, & + ERI_1(1,1,1), nBas*nBas, X2(1,1), dim_1, & + 0.d0, rho2(1,1,1), nBas*nBas) - rho1(p,q,ab) = rho1(p,q,ab) & - + (ERI(p,q,c,d) - ERI(p,q,d,c))*X1(cd,ab) - end do ! d - end do ! c - - kl = 0 - do k = nC+1, nO - do l = k+1, nO + call dgemm("N", "N", nBas*nBas, dim_2, dim_2, 1.d0, & + ERI_2(1,1,1), nBas*nBas, Y2(1,1), dim_2, & + 1.d0, rho2(1,1,1), nBas*nBas) - kl = kl + 1 + deallocate(ERI_1, ERI_2) - rho1(p,q,ab) = rho1(p,q,ab) & - + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y1(kl,ab) - end do ! l - end do ! k - - end do ! b - end do ! a - - ij = 0 - do i = nC+1, nO - do j = i+1, nO - ij = ij + 1 - - cd = 0 - - do c = nO+1, nBas-nR - do d = c+1, nBas-nR - - cd = cd + 1 - - rho2(p,q,ij) = rho2(p,q,ij) & - + (ERI(p,q,c,d) - ERI(p,q,d,c))*X2(cd,ij) - end do ! d - end do ! c - - kl = 0 - do k = nC+1, nO - do l = k+1, nO - - kl = kl + 1 - - rho2(p,q,ij) = rho2(p,q,ij) & - + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y2(kl,ij) - end do ! l - end do ! k - - end do ! j - end do ! i - - end do ! p - end do ! q - !$OMP END DO - !$OMP END PARALLEL +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & +! !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) +! !$OMP DO COLLAPSE(2) +! do q = nC+1, nBas-nR +! do p = nC+1, nBas-nR +! +! ab = 0 +! +! do a = nO+1, nBas-nR +! do b = a+1, nBas-nR +! +! ab = ab + 1 +! +! cd = 0 +! do c = nO+1, nBas-nR +! do d = c+1, nBas-nR +! +! cd = cd + 1 +! +! rho1(p,q,ab) = rho1(p,q,ab) & +! + (ERI(p,q,c,d) - ERI(p,q,d,c))*X1(cd,ab) +! end do ! d +! end do ! c +! +! kl = 0 +! do k = nC+1, nO +! do l = k+1, nO +! +! kl = kl + 1 +! +! rho1(p,q,ab) = rho1(p,q,ab) & +! + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y1(kl,ab) +! end do ! l +! end do ! k +! +! end do ! b +! end do ! a +! +! ij = 0 +! do i = nC+1, nO +! do j = i+1, nO +! +! ij = ij + 1 +! +! cd = 0 +! +! do c = nO+1, nBas-nR +! do d = c+1, nBas-nR +! +! cd = cd + 1 +! +! rho2(p,q,ij) = rho2(p,q,ij) & +! + (ERI(p,q,c,d) - ERI(p,q,d,c))*X2(cd,ij) +! end do ! d +! end do ! c +! +! kl = 0 +! do k = nC+1, nO +! do l = k+1, nO +! +! kl = kl + 1 +! +! rho2(p,q,ij) = rho2(p,q,ij) & +! + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y2(kl,ij) +! end do ! l +! end do ! k +! +! end do ! j +! end do ! i +! +! end do ! p +! end do ! q +! !$OMP END DO +! !$OMP END PARALLEL end if @@ -209,11 +269,10 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 ! alpha-beta block !---------------------------------------------- - ! TODO - ! debug for nC & nR - if(ispin == 3) then + print*, "ispin = ", ispin + dim_1 = (nBas - nO) * (nBas - nO) dim_2 = nO * nO From 5a738f00b94d3e6aa53fe7c7eeb606093ae79665 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 21 Aug 2024 09:03:40 +0200 Subject: [PATCH 06/15] OpenMP -> DGEMM for ispin=2,4 in GTpp_excitation_density --- src/GT/GTpp_excitation_density.f90 | 189 ++++++++++++++++++----------- 1 file changed, 121 insertions(+), 68 deletions(-) diff --git a/src/GT/GTpp_excitation_density.f90 b/src/GT/GTpp_excitation_density.f90 index fe86ef3..05f9c2d 100644 --- a/src/GT/GTpp_excitation_density.f90 +++ b/src/GT/GTpp_excitation_density.f90 @@ -2,6 +2,11 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 ! Compute excitation densities for T-matrix self-energy + ! TODO + ! debug DGEMM for nC != 0 + ! and nR != 0 + + implicit none @@ -127,81 +132,132 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 2 .or. ispin == 4) then - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & - !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) + dim_1 = (nBas - nO) * (nBas - nO - 1) / 2 + dim_2 = nO * (nO - 1) / 2 + + allocate(ERI_1(nBas,nBas,dim_1), ERI_2(nBas,nBas,dim_2)) + ERI_1 = 0.d0 + ERI_2 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p, q, c, d, cd, k, l, kl) & + !$OMP SHARED(nC, nBas, nR, nO, ERI_1, ERI_2, ERI) !$OMP DO COLLAPSE(2) do q = nC+1, nBas-nR do p = nC+1, nBas-nR - - ab = 0 + cd = 0 + do c = nO+1, nBas-nR + do d = c+1, nBas-nR + cd = cd + 1 + ERI_1(p,q,cd) = ERI(p,q,c,d) - ERI(p,q,d,c) + enddo + enddo + kl = 0 + do k = nC+1, nO + do l = k+1, nO + kl = kl + 1 + ERI_2(p,q,kl) = ERI(p,q,k,l) - ERI(p,q,l,k) + end do + end do + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - do a = nO+1, nBas-nR - do b = a+1, nBas-nR + call dgemm("N", "N", nBas*nBas, dim_1, dim_1, 1.d0, & + ERI_1(1,1,1), nBas*nBas, X1(1,1), dim_1, & + 0.d0, rho1(1,1,1), nBas*nBas) - ab = ab + 1 - - cd = 0 - do c = nO+1, nBas-nR - do d = c+1, nBas-nR + call dgemm("N", "N", nBas*nBas, dim_1, dim_2, 1.d0, & + ERI_2(1,1,1), nBas*nBas, Y1(1,1), dim_2, & + 1.d0, rho1(1,1,1), nBas*nBas) - cd = cd + 1 + call dgemm("N", "N", nBas*nBas, dim_2, dim_1, 1.d0, & + ERI_1(1,1,1), nBas*nBas, X2(1,1), dim_1, & + 0.d0, rho2(1,1,1), nBas*nBas) - rho1(p,q,ab) = rho1(p,q,ab) & - + (ERI(p,q,c,d) - ERI(p,q,d,c))*X1(cd,ab) - end do ! d - end do ! c - - kl = 0 - do k = nC+1, nO - do l = k+1, nO + call dgemm("N", "N", nBas*nBas, dim_2, dim_2, 1.d0, & + ERI_2(1,1,1), nBas*nBas, Y2(1,1), dim_2, & + 1.d0, rho2(1,1,1), nBas*nBas) - kl = kl + 1 + deallocate(ERI_1, ERI_2) - rho1(p,q,ab) = rho1(p,q,ab) & - + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y1(kl,ab) - end do ! l - end do ! k - - end do ! b - end do ! a - - ij = 0 - do i = nC+1, nO - do j = i+1, nO - ij = ij + 1 - - cd = 0 - - do c = nO+1, nBas-nR - do d = c+1, nBas-nR - - cd = cd + 1 - - rho2(p,q,ij) = rho2(p,q,ij) & - + (ERI(p,q,c,d) - ERI(p,q,d,c))*X2(cd,ij) - end do ! d - end do ! c - - kl = 0 - do k = nC+1, nO - do l = k+1, nO - - kl = kl + 1 - - rho2(p,q,ij) = rho2(p,q,ij) & - + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y2(kl,ij) - end do ! l - end do ! k - - end do ! j - end do ! i - - end do ! p - end do ! q - !$OMP END DO - !$OMP END PARALLEL +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & +! !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) +! !$OMP DO COLLAPSE(2) +! do q = nC+1, nBas-nR +! do p = nC+1, nBas-nR +! +! ab = 0 +! +! do a = nO+1, nBas-nR +! do b = a+1, nBas-nR +! +! ab = ab + 1 +! +! cd = 0 +! do c = nO+1, nBas-nR +! do d = c+1, nBas-nR +! +! cd = cd + 1 +! +! rho1(p,q,ab) = rho1(p,q,ab) & +! + (ERI(p,q,c,d) - ERI(p,q,d,c))*X1(cd,ab) +! end do ! d +! end do ! c +! +! kl = 0 +! do k = nC+1, nO +! do l = k+1, nO +! +! kl = kl + 1 +! +! rho1(p,q,ab) = rho1(p,q,ab) & +! + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y1(kl,ab) +! end do ! l +! end do ! k +! +! end do ! b +! end do ! a +! +! ij = 0 +! do i = nC+1, nO +! do j = i+1, nO +! +! ij = ij + 1 +! +! cd = 0 +! +! do c = nO+1, nBas-nR +! do d = c+1, nBas-nR +! +! cd = cd + 1 +! +! rho2(p,q,ij) = rho2(p,q,ij) & +! + (ERI(p,q,c,d) - ERI(p,q,d,c))*X2(cd,ij) +! end do ! d +! end do ! c +! +! kl = 0 +! do k = nC+1, nO +! do l = k+1, nO +! +! kl = kl + 1 +! +! rho2(p,q,ij) = rho2(p,q,ij) & +! + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y2(kl,ij) +! end do ! l +! end do ! k +! +! end do ! j +! end do ! i +! +! end do ! p +! end do ! q +! !$OMP END DO +! !$OMP END PARALLEL end if @@ -209,9 +265,6 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 ! alpha-beta block !---------------------------------------------- - ! TODO - ! debug for nC & nR - if(ispin == 3) then dim_1 = (nBas - nO) * (nBas - nO) From a1786b2ade0cb07f023dfa44e12230bed952a786 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 21 Aug 2024 09:13:25 +0200 Subject: [PATCH 07/15] rm print ispin --- src/GT/GTpp_excitation_density.f90 | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/GT/GTpp_excitation_density.f90 b/src/GT/GTpp_excitation_density.f90 index a3be531..05f9c2d 100644 --- a/src/GT/GTpp_excitation_density.f90 +++ b/src/GT/GTpp_excitation_density.f90 @@ -54,8 +54,6 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 1) then - print*, "ispin = ", ispin - !$OMP PARALLEL DEFAULT(NONE) & !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) @@ -269,8 +267,6 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 3) then - print*, "ispin = ", ispin - dim_1 = (nBas - nO) * (nBas - nO) dim_2 = nO * nO From 992e3dff4b98142130638f597d8cad240e21ba25 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 21 Aug 2024 13:51:12 +0200 Subject: [PATCH 08/15] optim in AdAt --- src/RPA/sort_ppRPA.f90 | 9 ++++---- src/utils/orthogonalization_matrix.f90 | 2 +- src/utils/utils.f90 | 30 +++++++++++++++++++------- src/utils/wrap_lapack.f90 | 4 ++++ 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/RPA/sort_ppRPA.f90 b/src/RPA/sort_ppRPA.f90 index e7037ad..8518ecb 100644 --- a/src/RPA/sort_ppRPA.f90 +++ b/src/RPA/sort_ppRPA.f90 @@ -39,9 +39,10 @@ subroutine sort_ppRPA(nOO,nVV,Om,Z,Om1,X1,Y1,Om2,X2,Y2) double precision,intent(out) :: X2(nVV,nOO) double precision,intent(out) :: Y2(nOO,nOO) + ! Memory allocation - allocate(M(nOO+nVV,nOO+nVV),Z1(nOO+nVV,nVV),Z2(nOO+nVV,nOO),order1(nVV),order2(nOO)) + allocate(M(nOO+nVV,nOO+nVV),Z1(nOO+nVV,nVV),Z2(nOO+nVV,nOO),order1(nVV),order2(nOO)) ! Initializatiom @@ -86,7 +87,7 @@ subroutine sort_ppRPA(nOO,nVV,Om,Z,Om1,X1,Y1,Om2,X2,Y2) end if - end do + end do if(minval(Om1) < 0d0 .or. ab /= nVV) call print_warning('You may have instabilities in pp-RPA!!') if(maxval(Om2) > 0d0 .or. ij /= nOO) call print_warning('You may have instabilities in pp-RPA!!') @@ -111,7 +112,8 @@ subroutine sort_ppRPA(nOO,nVV,Om,Z,Om1,X1,Y1,Om2,X2,Y2) call quick_sort(Om2,order2,nOO) call set_order(Z2,order2,nOO+nVV,nOO) - end if + end if + ! Orthogonalize eigenvectors @@ -202,7 +204,6 @@ subroutine sort_ppRPA(nOO,nVV,Om,Z,Om1,X1,Y1,Om2,X2,Y2) if(nVV > 0) call dgemm ('N', 'N', nOO+nVV, nVV, nOO+nVV, 1d0, M, nOO+nVV, Z1, nOO+nVV, 0d0, tmp1, nOO+nVV) if(nVV > 0) call dgemm ('T', 'N', nVV , nVV, nOO+nVV, 1d0, Z1, nOO+nVV, tmp1, nOO+nVV, 0d0, S1, nVV) - if(nOO > 0) call dgemm ('N', 'N', nOO+nVV, nOO, nOO+nVV, 1d0, M, nOO+nVV, -1d0*Z2, nOO+nVV, 0d0, tmp2, nOO+nVV) if(nOO > 0) call dgemm ('T', 'N', nOO , nOO, nOO+nVV, 1d0, Z2, nOO+nVV, tmp2, nOO+nVV, 0d0, S2, nOO) diff --git a/src/utils/orthogonalization_matrix.f90 b/src/utils/orthogonalization_matrix.f90 index 176862a..d7e0089 100644 --- a/src/utils/orthogonalization_matrix.f90 +++ b/src/utils/orthogonalization_matrix.f90 @@ -54,7 +54,7 @@ subroutine orthogonalization_matrix(nBas,S,X) end do - call ADAt(nBas,Uvec,Uval,X) + call ADAt(nBas, Uvec(1,1), Uval(1), X(1,1)) elseif(ortho_type == 2) then diff --git a/src/utils/utils.f90 b/src/utils/utils.f90 index efd3087..80b8322 100644 --- a/src/utils/utils.f90 +++ b/src/utils/utils.f90 @@ -375,15 +375,29 @@ subroutine ADAt(N,A,D,B) double precision,intent(out) :: B(N,N) - B = 0d0 + double precision, allocatable :: tmp(:,:) - do i=1,N - do j=1,N - do k=1,N - B(i,k) = B(i,k) + A(i,j)*D(j)*A(k,j) - end do - end do - end do + allocate(tmp(N,N)) + !$OMP PARALLEL DEFAULT(NONE) PRIVATE(i, j) SHARED(N, A, D, tmp) + !$OMP DO + do i = 1, N + do j = 1, N + tmp(i,j) = D(i) * A(j,i) + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + call dgemm("N", "N", N, N, N, 1.d0, A(1,1), N, tmp(1,1), N, 0.d0, B(1,1), N) + deallocate(tmp) + +! B = 0d0 +! do i=1,N +! do j=1,N +! do k=1,N +! B(i,k) = B(i,k) + A(i,j)*D(j)*A(k,j) +! end do +! end do +! end do end subroutine !------------------------------------------------------------------------ diff --git a/src/utils/wrap_lapack.f90 b/src/utils/wrap_lapack.f90 index d63b722..86280fe 100644 --- a/src/utils/wrap_lapack.f90 +++ b/src/utils/wrap_lapack.f90 @@ -31,6 +31,8 @@ subroutine diagonalize_general_matrix(N,A,WR,VR) call dgeev('V','V',N,A,N,WR,WI,VL,N,VR,N,work,lwork,info) + deallocate(work, WI, VL) + if(info /= 0) then print*,'Problem in diagonalize_general_matrix (dgeev)!!' end if @@ -67,6 +69,8 @@ subroutine diagonalize_matrix(N,A,e) allocate(work(lwork)) call dsyev('V','U',N,A,N,e,work,lwork,info) + + deallocate(work) if(info /= 0) then print*,'Problem in diagonalize_matrix (dsyev)!!' From bb4729192ef9dbc6175826129fd9d990bfa26261 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 22 Aug 2024 15:23:08 +0200 Subject: [PATCH 09/15] GW_ppBSE_static_kernel_C for ispin = 1 --- src/GW/GW_ppBSE.f90 | 109 ++++++++++++++++++++++++---- src/GW/GW_ppBSE_static_kernel_C.f90 | 92 +++++++++++++++++------ src/GW/RG0W0.f90 | 56 +++++++++++++- 3 files changed, 221 insertions(+), 36 deletions(-) diff --git a/src/GW/GW_ppBSE.f90 b/src/GW/GW_ppBSE.f90 index c1d6daa..e26b2e1 100644 --- a/src/GW/GW_ppBSE.f90 +++ b/src/GW/GW_ppBSE.f90 @@ -66,6 +66,11 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS, double precision,intent(out) :: EcBSE(nspin) + double precision :: t0, t1 + double precision :: tt0, tt1 + + call wall_time(t0) + !--------------------------------- ! Compute (singlet) RPA screening !--------------------------------- @@ -76,11 +81,25 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS, allocate(OmRPA(nS),XpY_RPA(nS,nS),XmY_RPA(nS,nS),rho_RPA(nBas,nBas,nS), & Aph(nS,nS),Bph(nS,nS)) - call phLR_A(isp_W,dRPA_W,nBas,nC,nO,nV,nR,nS,1d0,eW,ERI,Aph) - if(.not.TDA_W) call phLR_B(isp_W,dRPA_W,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph) + call wall_time(tt0) + call phLR_A(isp_W,dRPA_W,nBas,nC,nO,nV,nR,nS,1d0,eW,ERI,Aph) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_A =',tt1-tt0,' seconds' + call wall_time(tt0) + if(.not.TDA_W) call phLR_B(isp_W,dRPA_W,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_B =',tt1-tt0,' seconds' + + call wall_time(tt0) call phLR(TDA_W,nS,Aph,Bph,EcRPA,OmRPA,XpY_RPA,XmY_RPA) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR =',tt1-tt0,' seconds' + + call wall_time(tt0) call GW_excitation_density(nBas,nC,nO,nR,nS,ERI,XpY_RPA,rho_RPA) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_excitation_density =',tt1-tt0,' seconds' deallocate(XpY_RPA,XmY_RPA,Aph,Bph) @@ -108,32 +127,63 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS, ! Compute BSE excitation energies - if(.not.TDA) call GW_ppBSE_static_kernel_B(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta) - call GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta) - call GW_ppBSE_static_kernel_D(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta) + call wall_time(tt0) + call GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_C =',tt1-tt0,' seconds' + call wall_time(tt0) + call GW_ppBSE_static_kernel_D(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_D =',tt1-tt0,' seconds' + + call wall_time(tt0) + if(.not.TDA) call GW_ppBSE_static_kernel_B(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_B =',tt1-tt0,' seconds' + + call wall_time(tt0) + call ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_C =',tt1-tt0,' seconds' + + call wall_time(tt0) + call ppLR_D(ispin,nBas,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_D =',tt1-tt0,' seconds' + + call wall_time(tt0) if(.not.TDA) call ppLR_B(ispin,nBas,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp) - call ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp) - call ppLR_D(ispin,nBas,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_B =',tt1-tt0,' seconds' Bpp(:,:) = Bpp(:,:) + KB_sta(:,:) Cpp(:,:) = Cpp(:,:) + KC_sta(:,:) Dpp(:,:) = Dpp(:,:) + KD_sta(:,:) + call wall_time(tt0) call ppLR(TDA,nOO,nVV,Bpp,Cpp,Dpp,Om1,X1,Y1,Om2,X2,Y2,EcBSE(ispin)) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR =',tt1-tt0,' seconds' + call wall_time(tt0) call ppLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nOO,nVV,dipole_int,Om1,X1,Y1,Om2,X2,Y2) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_transition_vectors =',tt1-tt0,' seconds' !----------------------------------------------------! ! Compute the dynamical screening at the ppBSE level ! !----------------------------------------------------! + call wall_time(tt0) if(dBSE) & call GW_ppBSE_dynamic_perturbation(ispin,dTDA,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,eW,eGW,ERI,dipole_int,OmRPA,rho_RPA, & Om1,X1,Y1,Om2,X2,Y2,KB_sta,KC_sta,KD_sta) - write(*,*) "Deallocate not done" + + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_dynamic_perturbation =',tt1-tt0,' seconds' + deallocate(Om1,X1,Y1,Om2,X2,Y2,Bpp,Cpp,Dpp,KB_sta,KC_sta,KD_sta) - write(*,*) "Deallocate done" end if !------------------- @@ -160,33 +210,66 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS, ! Compute BSE excitation energies + call wall_time(tt0) + call GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_C =',tt1-tt0,' seconds' + + call wall_time(tt0) + call GW_ppBSE_static_kernel_D(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_D =',tt1-tt0,' seconds' + + call wall_time(tt0) if(.not.TDA) call GW_ppBSE_static_kernel_B(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta) - call GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta) - call GW_ppBSE_static_kernel_D(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_B =',tt1-tt0,' seconds' + call wall_time(tt0) + call ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_C =',tt1-tt0,' seconds' + call wall_time(tt0) + call ppLR_D(ispin,nBas,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_D =',tt1-tt0,' seconds' + + call wall_time(tt0) if(.not.TDA) call ppLR_B(ispin,nBas,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp) - call ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp) - call ppLR_D(ispin,nBas,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_B =',tt1-tt0,' seconds' Bpp(:,:) = Bpp(:,:) + KB_sta(:,:) Cpp(:,:) = Cpp(:,:) + KC_sta(:,:) Dpp(:,:) = Dpp(:,:) + KD_sta(:,:) + call wall_time(tt0) call ppLR(TDA,nOO,nVV,Bpp,Cpp,Dpp,Om1,X1,Y1,Om2,X2,Y2,EcBSE(ispin)) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR =',tt1-tt0,' seconds' + call wall_time(tt0) call ppLR_transition_vectors(.false.,nBas,nC,nO,nV,nR,nOO,nVV,dipole_int,Om1,X1,Y1,Om2,X2,Y2) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_transition_vectors =',tt1-tt0,' seconds' !----------------------------------------------------! ! Compute the dynamical screening at the ppBSE level ! !----------------------------------------------------! + call wall_time(tt0) if(dBSE) & call GW_ppBSE_dynamic_perturbation(ispin,dTDA,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,eW,eGW,ERI,dipole_int,OmRPA,rho_RPA, & Om1,X1,Y1,Om2,X2,Y2,KB_sta,KC_sta,KD_sta) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_dynamic_perturbation =',tt1-tt0,' seconds' deallocate(Om1,X1,Y1,Om2,X2,Y2,Bpp,Cpp,Dpp,KB_sta,KC_sta,KD_sta) end if + call wall_time(t1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE =',t1-t0,' seconds' + end subroutine diff --git a/src/GW/GW_ppBSE_static_kernel_C.f90 b/src/GW/GW_ppBSE_static_kernel_C.f90 index ef21825..dfc9c75 100644 --- a/src/GW/GW_ppBSE_static_kernel_C.f90 +++ b/src/GW/GW_ppBSE_static_kernel_C.f90 @@ -26,44 +26,94 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI double precision,external :: Kronecker_delta double precision :: chi double precision :: eps + double precision :: tmp_ab, lambda4, eta2 integer :: a,b,c,d,ab,cd,m + integer :: a0, aa + + double precision, allocatable :: Om_tmp(:) ! Output variables double precision,intent(out) :: KC(nVV,nVV) -! Initialization - - KC(:,:) = 0d0 - !---------------! ! Singlet block ! !---------------! if(ispin == 1) then - ab = 0 - do a=nO+1,nBas-nR - do b=a,nBas-nR - ab = ab + 1 + a0 = nBas - nR - nO + lambda4 = 4.d0 * lambda + eta2 = eta * eta + + allocate(Om_tmp(nS)) + + !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp) + !$OMP DO + do m = 1, nS + Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m, tmp_ab) & + !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC) + !$OMP DO + do a = nO+1, nBas-nR + aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO + do b = a, nBas-nR + ab = aa + b + + tmp_ab = lambda4 + if(a .eq. b) then + tmp_ab = 0.7071067811865475d0 * lambda4 + endif + cd = 0 - do c=nO+1,nBas-nR - do d=c,nBas-nR + do c = nO + 1, nBas-nR + do d = c, nBas-nR cd = cd + 1 - chi = 0d0 - do m=1,nS - eps = Om(m)**2 + eta**2 - chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps & - - rho(a,d,m)*rho(b,c,m)*Om(m)/eps - end do + KC(ab,cd) = 0d0 + do m = 1, nS + KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) & + - rho(a,d,m) * rho(b,c,m) * Om_tmp(m) + end do - KC(ab,cd) = 4d0*lambda*chi/sqrt((1d0 + Kronecker_delta(a,b))*(1d0 + Kronecker_delta(c,d))) + KC(ab,cd) = tmp_ab * KC(ab,cd) + if(c .eq. d) then + KC(ab,cd) = 0.7071067811865475d0 * KC(ab,cd) + endif + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - end do - end do - end do - end do +! ab = 0 +! do a=nO+1,nBas-nR +! do b=a,nBas-nR +! ab = ab + 1 +! cd = 0 +! do c=nO+1,nBas-nR +! do d=c,nBas-nR +! cd = cd + 1 +! +! chi = 0d0 +! do m=1,nS +! eps = Om(m)**2 + eta**2 +! chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps & +! - rho(a,d,m)*rho(b,c,m)*Om(m)/eps +! end do +! +! KC(ab,cd) = 4d0*lambda*chi/sqrt((1d0 + Kronecker_delta(a,b))*(1d0 + Kronecker_delta(c,d))) +! +! end do +! end do +! end do +! end do end if diff --git a/src/GW/RG0W0.f90 b/src/GW/RG0W0.f90 index fc30b5d..1bd18e0 100644 --- a/src/GW/RG0W0.f90 +++ b/src/GW/RG0W0.f90 @@ -59,6 +59,11 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA double precision,allocatable :: eGWlin(:) double precision,allocatable :: eGW(:) + double precision :: t0, t1 + double precision :: tt0, tt1 + + call wall_time(t0) + ! Output variables ! Hello world @@ -101,26 +106,48 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA ! Compute screening ! !-------------------! - call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,eHF,ERI,Aph) + call wall_time(tt0) + call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,eHF,ERI,Aph) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_A =',tt1-tt0,' seconds' + + call wall_time(tt0) if(.not.TDA_W) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_B =',tt1-tt0,' seconds' + call wall_time(tt0) call phLR(TDA_W,nS,Aph,Bph,EcRPA,Om,XpY,XmY) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR =',tt1-tt0,' seconds' + call wall_time(tt0) if(print_W) call print_excitation_energies('phRPA@RHF','singlet',nS,Om) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for print_excitation_energies =',tt1-tt0,' seconds' !--------------------------! ! Compute spectral weights ! !--------------------------! + call wall_time(tt0) call GW_excitation_density(nBas,nC,nO,nR,nS,ERI,XpY,rho) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_excitation_density =',tt1-tt0,' seconds' !------------------------! ! Compute GW self-energy ! !------------------------! + call wall_time(tt0) if(regularize) call GW_regularization(nBas,nC,nO,nV,nR,nS,eHF,Om,rho) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_regularization =',tt1-tt0,' seconds' + call wall_time(tt0) call GW_self_energy_diag(eta,nBas,nC,nO,nV,nR,nS,eHF,Om,rho,EcGM,SigC,Z) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_self_energy_diag =',tt1-tt0,' seconds' !-----------------------------------! ! Solve the quasi-particle equation ! @@ -128,6 +155,7 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA ! Linearized or graphical solution? + call wall_time(tt0) eGWlin(:) = eHF(:) + Z(:)*SigC(:) if(linearize) then @@ -145,6 +173,8 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA call GW_QP_graph(eta,nBas,nC,nO,nV,nR,nS,eHF,Om,rho,eGWlin,eHF,eGW,Z) end if + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for QP =',tt1-tt0,' seconds' ! Plot self-energy, renormalization factor, and spectral function @@ -158,19 +188,33 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA ! Compute the RPA correlation energy + call wall_time(tt0) call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,eGW,ERI,Aph) - if(.not.TDA_W) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_A =',tt1-tt0,' seconds' + call wall_time(tt0) + if(.not.TDA_W) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_B =',tt1-tt0,' seconds' + + call wall_time(tt0) call phLR(TDA_W,nS,Aph,Bph,EcRPA,Om,XpY,XmY) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR =',tt1-tt0,' seconds' !--------------! ! Dump results ! !--------------! + call wall_time(tt0) call print_RG0W0(nBas,nO,eHF,ENuc,ERHF,SigC,Z,eGW,EcRPA,EcGM) + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for print_RG0W0 =',tt1-tt0,' seconds' ! Perform BSE calculation + call wall_time(tt0) if(dophBSE) then call GW_phBSE(dophBSE2,TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS,ERI,dipole_int,eHF,eGW,EcBSE) @@ -221,7 +265,10 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA end if end if + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for phBSE =',tt1-tt0,' seconds' + call wall_time(tt0) if(doppBSE) then call GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS,ERI,dipole_int,eHF,eGW,EcBSE) @@ -238,6 +285,8 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA write(*,*) end if + call wall_time(tt1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppBSE =',tt1-tt0,' seconds' ! end if @@ -251,4 +300,7 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA end if + call wall_time(t1) + write(*,'(A65,1X,F9.3,A8)') 'Wall time for RG0W0 =',t1-t0,' seconds' + end subroutine From af65a4d69c4d4198177dbb84f363bafdd7471fc5 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 22 Aug 2024 16:44:16 +0200 Subject: [PATCH 10/15] GW_ppBSE_static_kernel_C for ispin = 2 --- src/GW/GW_ppBSE_static_kernel_C.f90 | 82 ++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 18 deletions(-) diff --git a/src/GW/GW_ppBSE_static_kernel_C.f90 b/src/GW/GW_ppBSE_static_kernel_C.f90 index dfc9c75..534654c 100644 --- a/src/GW/GW_ppBSE_static_kernel_C.f90 +++ b/src/GW/GW_ppBSE_static_kernel_C.f90 @@ -71,7 +71,7 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI endif cd = 0 - do c = nO + 1, nBas-nR + do c = nO+1, nBas-nR do d = c, nBas-nR cd = cd + 1 @@ -92,6 +92,8 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI !$OMP END DO !$OMP END PARALLEL + deallocate(Om_tmp) + ! ab = 0 ! do a=nO+1,nBas-nR ! do b=a,nBas-nR @@ -123,28 +125,72 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI if(ispin == 2) then - ab = 0 - do a=nO+1,nBas-nR - do b=a+1,nBas-nR - ab = ab + 1 + a0 = nBas - nR - nO - 1 + lambda4 = 4.d0 * lambda + eta2 = eta * eta + + allocate(Om_tmp(nS)) + + !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp) + !$OMP DO + do m = 1, nS + Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2) + enddo + !$OMP END DO + !$OMP END PARALLEL + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m) & + !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC) + !$OMP DO + do a = nO+1, nBas-nR + aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO - 1 + do b = a+1, nBas-nR + ab = aa + b + cd = 0 - do c=nO+1,nBas-nR - do d=c+1,nBas-nR + do c = nO+1, nBas-nR + do d = c+1, nBas-nR cd = cd + 1 - chi = 0d0 - do m=1,nS - eps = Om(m)**2 + eta**2 - chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps & - + rho(a,d,m)*rho(b,c,m)*Om(m)/eps + KC(ab,cd) = 0d0 + do m = 1, nS + KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) & + + rho(a,d,m) * rho(b,c,m) * Om_tmp(m) end do - - KC(ab,cd) = 4d0*lambda*chi - end do - end do - end do - end do + KC(ab,cd) = lambda4 * KC(ab,cd) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + deallocate(Om_tmp) + +! ab = 0 +! do a=nO+1,nBas-nR +! do b=a+1,nBas-nR +! ab = ab + 1 +! cd = 0 +! do c=nO+1,nBas-nR +! do d=c+1,nBas-nR +! cd = cd + 1 +! +! chi = 0d0 +! do m=1,nS +! eps = Om(m)**2 + eta**2 +! chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps & +! + rho(a,d,m)*rho(b,c,m)*Om(m)/eps +! end do +! +! KC(ab,cd) = 4d0*lambda*chi +! +! end do +! end do +! end do +! end do end if From 8e26c3582622d2cfb7c9b093ce85dccd19ca2fd3 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 22 Aug 2024 16:48:15 +0200 Subject: [PATCH 11/15] added Olympe comf --- src/make_ninja.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/make_ninja.py b/src/make_ninja.py index 1ac16ed..1e72639 100755 --- a/src/make_ninja.py +++ b/src/make_ninja.py @@ -76,10 +76,21 @@ STDCXX=-lstdc++ FIX_ORDER_OF_LIBS=-Wl,--start-group """ +compile_olympe = """ +FC = ifort -mkl=parallel -qopenmp +AR = ar crs +FFLAGS = -I$IDIR -Ofast -traceback -xCORE-AVX512 +CC = icc +CXX = icpc +LAPACK= +STDCXX=-lstdc++ +FIX_ORDER_OF_LIBS=-Wl,--start-group +""" if sys.platform in ["linux", "linux2"]: - compiler = compile_gfortran_linux +# compiler = compile_gfortran_linux # compiler = compile_ifort_linux + compiler = compile_olympe elif sys.platform == "darwin": compiler = compile_gfortran_mac else: From 14e95287f62b204412cadd98f83fae15da5ad34a Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 22 Aug 2024 18:15:11 +0200 Subject: [PATCH 12/15] OpenMP in ppLR_C for ispin = 1 --- src/LR/ppLR_C.f90 | 76 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 13 deletions(-) diff --git a/src/LR/ppLR_C.f90 b/src/LR/ppLR_C.f90 index c74b2dd..72f15f0 100644 --- a/src/LR/ppLR_C.f90 +++ b/src/LR/ppLR_C.f90 @@ -23,6 +23,8 @@ subroutine ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,lambda,e,ERI,Cpp) double precision,external :: Kronecker_delta integer :: a,b,c,d,ab,cd + integer :: a0, aa + double precision :: e_ab, tmp_ab, delta_ac, tmp_cd ! Output variables @@ -37,22 +39,70 @@ subroutine ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,lambda,e,ERI,Cpp) if(ispin == 1) then - ab = 0 - do a=nO+1,nBas-nR - do b=a,nBas-nR - ab = ab + 1 + a0 = nBas - nR - nO + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(a, b, aa, ab, c, d, cd, e_ab, tmp_ab, delta_ac, tmp_cd) & + !$OMP SHARED(nO, nBas, nR, a0, eF, lambda, e, ERI, Cpp) + !$OMP DO + do a = nO+1, nBas-nR + aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO + do b = a, nBas-nR + ab = aa + b + + e_ab = e(a) + e(b) - eF + + tmp_ab = lambda + if(a .eq. b) then + tmp_ab = 0.7071067811865475d0 * lambda + endif + cd = 0 - do c=nO+1,nBas-nR - do d=c,nBas-nR + do c = nO+1, nBas-nR + + delta_ac = 0.d0 + if(a .eq. c) then + delta_ac = 1.d0 + endif + + do d = c, nBas-nR cd = cd + 1 + + tmp_cd = tmp_ab + if(c .eq. d) then + tmp_cd = 0.7071067811865475d0 * tmp_ab + endif + + Cpp(ab,cd) = 0.d0 + if(b .eq. d) then + Cpp(ab,cd) = e_ab * delta_ac + endif - Cpp(ab,cd) = + (e(a) + e(b) - eF)*Kronecker_delta(a,c)*Kronecker_delta(b,d) & - + lambda*(ERI(a,b,c,d) + ERI(a,b,d,c))/sqrt((1d0 + Kronecker_delta(a,b))*(1d0 + Kronecker_delta(c,d))) - - end do - end do - end do - end do + Cpp(ab,cd) = Cpp(ab,cd) + tmp_cd * (ERI(a,b,c,d) + ERI(a,b,d,c)) + enddo + enddo + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL + + +! ab = 0 +! do a=nO+1,nBas-nR +! do b=a,nBas-nR +! ab = ab + 1 +! cd = 0 +! do c=nO+1,nBas-nR +! do d=c,nBas-nR +! cd = cd + 1 +! +! Cpp(ab,cd) = + (e(a) + e(b) - eF)*Kronecker_delta(a,c)*Kronecker_delta(b,d) & +! + lambda*(ERI(a,b,c,d) + ERI(a,b,d,c))/sqrt((1d0 + Kronecker_delta(a,b))*(1d0 + Kronecker_delta(c,d))) +! +! end do +! end do +! end do +! end do end if From adf05c13eef8434fa375a8cb0cf5595d9f1edd51 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 22 Aug 2024 18:15:47 +0200 Subject: [PATCH 13/15] OpenMP -> DGEMM in GW_ppBSE_static_kernel_C, ispin=1,2 --- src/GW/GW_ppBSE_static_kernel_C.f90 | 210 +++++++++++++++++++++++----- 1 file changed, 176 insertions(+), 34 deletions(-) diff --git a/src/GW/GW_ppBSE_static_kernel_C.f90 b/src/GW/GW_ppBSE_static_kernel_C.f90 index 534654c..2df0025 100644 --- a/src/GW/GW_ppBSE_static_kernel_C.f90 +++ b/src/GW/GW_ppBSE_static_kernel_C.f90 @@ -31,6 +31,8 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI integer :: a0, aa double precision, allocatable :: Om_tmp(:) + double precision, allocatable :: tmp_m(:,:,:) + double precision, allocatable :: tmp(:,:,:,:) ! Output variables @@ -46,19 +48,27 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI lambda4 = 4.d0 * lambda eta2 = eta * eta - allocate(Om_tmp(nS)) + allocate(tmp_m(nBas,nBas,nS)) + allocate(tmp(nBas,nBas,nBas,nBas)) - !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp) - !$OMP DO do m = 1, nS - Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2) + eps = Om(m) / (Om(m)*Om(m) + eta2) + do c = 1, nBas + do a = 1, nBas + tmp_m(a,c,m) = eps * rho(a,c,m) + enddo + enddo enddo - !$OMP END DO - !$OMP END PARALLEL - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m, tmp_ab) & - !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC) + call dgemm("N", "T", nBas*nBas, nBas*nBas, nS, 1.d0, & + tmp_m(1,1,1), nBas*nBas, rho(1,1,1), nBas*nBas, & + 0.d0, tmp(1,1,1,1), nBas*nBas) + + deallocate(tmp_m) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(a, b, aa, ab, c, d, cd, tmp_ab) & + !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, tmp, KC) !$OMP DO do a = nO+1, nBas-nR aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO @@ -75,13 +85,7 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI do d = c, nBas-nR cd = cd + 1 - KC(ab,cd) = 0d0 - do m = 1, nS - KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) & - - rho(a,d,m) * rho(b,c,m) * Om_tmp(m) - end do - - KC(ab,cd) = tmp_ab * KC(ab,cd) + KC(ab,cd) = -tmp_ab * (tmp(a,c,b,d) + tmp(a,d,b,c)) if(c .eq. d) then KC(ab,cd) = 0.7071067811865475d0 * KC(ab,cd) endif @@ -92,8 +96,87 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI !$OMP END DO !$OMP END PARALLEL - deallocate(Om_tmp) + deallocate(tmp) + +! do a=nO+1,nBas-nR +! do b=a,nBas-nR +! ab = ab + 1 +! cd = 0 +! do c=nO+1,nBas-nR +! do d=c,nBas-nR +! cd = cd + 1 +! +! chi = 0d0 +! do m=1,nS +! eps = Om(m)**2 + eta**2 +! chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps & +! - rho(a,d,m)*rho(b,c,m)*Om(m)/eps +! end do + + +! --- --- --- +! OpenMP implementation +! --- --- --- +! +! a0 = nBas - nR - nO +! lambda4 = 4.d0 * lambda +! eta2 = eta * eta +! +! allocate(Om_tmp(nS)) +! +! !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp) +! !$OMP DO +! do m = 1, nS +! Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2) +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m, tmp_ab) & +! !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC) +! !$OMP DO +! do a = nO+1, nBas-nR +! aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO +! do b = a, nBas-nR +! ab = aa + b +! +! tmp_ab = lambda4 +! if(a .eq. b) then +! tmp_ab = 0.7071067811865475d0 * lambda4 +! endif +! +! cd = 0 +! do c = nO+1, nBas-nR +! do d = c, nBas-nR +! cd = cd + 1 +! +! KC(ab,cd) = 0d0 +! do m = 1, nS +! KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) & +! - rho(a,d,m) * rho(b,c,m) * Om_tmp(m) +! end do +! +! KC(ab,cd) = tmp_ab * KC(ab,cd) +! if(c .eq. d) then +! KC(ab,cd) = 0.7071067811865475d0 * KC(ab,cd) +! endif +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! +! deallocate(Om_tmp) +! --- --- --- + + +! --- --- --- +! Naive implementation +! --- --- --- +! ! ab = 0 ! do a=nO+1,nBas-nR ! do b=a,nBas-nR @@ -116,6 +199,7 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI ! end do ! end do ! end do +! --- --- --- end if @@ -129,19 +213,27 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI lambda4 = 4.d0 * lambda eta2 = eta * eta - allocate(Om_tmp(nS)) + allocate(tmp_m(nBas,nBas,nS)) + allocate(tmp(nBas,nBas,nBas,nBas)) - !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp) - !$OMP DO do m = 1, nS - Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2) + eps = Om(m) / (Om(m)*Om(m) + eta2) + do c = 1, nBas + do a = 1, nBas + tmp_m(a,c,m) = eps * rho(a,c,m) + enddo + enddo enddo - !$OMP END DO - !$OMP END PARALLEL - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m) & - !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC) + call dgemm("N", "T", nBas*nBas, nBas*nBas, nS, 1.d0, & + tmp_m(1,1,1), nBas*nBas, rho(1,1,1), nBas*nBas, & + 0.d0, tmp(1,1,1,1), nBas*nBas) + + deallocate(tmp_m) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(a, b, aa, ab, c, d, cd) & + !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, tmp, KC) !$OMP DO do a = nO+1, nBas-nR aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO - 1 @@ -153,13 +245,7 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI do d = c+1, nBas-nR cd = cd + 1 - KC(ab,cd) = 0d0 - do m = 1, nS - KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) & - + rho(a,d,m) * rho(b,c,m) * Om_tmp(m) - end do - - KC(ab,cd) = lambda4 * KC(ab,cd) + KC(ab,cd) = lambda4 * (-tmp(a,c,b,d) + tmp(a,d,b,c)) enddo enddo enddo @@ -167,8 +253,63 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI !$OMP END DO !$OMP END PARALLEL - deallocate(Om_tmp) + deallocate(tmp) + +! --- --- --- +! OpenMP implementation +! --- --- --- +! +! a0 = nBas - nR - nO - 1 +! lambda4 = 4.d0 * lambda +! eta2 = eta * eta +! +! allocate(Om_tmp(nS)) +! +! !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp) +! !$OMP DO +! do m = 1, nS +! Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2) +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m) & +! !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC) +! !$OMP DO +! do a = nO+1, nBas-nR +! aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO - 1 +! do b = a+1, nBas-nR +! ab = aa + b +! +! cd = 0 +! do c = nO+1, nBas-nR +! do d = c+1, nBas-nR +! cd = cd + 1 +! +! KC(ab,cd) = 0d0 +! do m = 1, nS +! KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) & +! + rho(a,d,m) * rho(b,c,m) * Om_tmp(m) +! end do +! +! KC(ab,cd) = lambda4 * KC(ab,cd) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! +! deallocate(Om_tmp) +! --- --- --- + + +! --- --- --- +! Naive implementation +! --- --- --- +! ! ab = 0 ! do a=nO+1,nBas-nR ! do b=a+1,nBas-nR @@ -191,6 +332,7 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI ! end do ! end do ! end do +! --- --- --- end if From 675fc77acc40253ff88d3ab276d6c88278a10984 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 22 Aug 2024 18:27:52 +0200 Subject: [PATCH 14/15] rm timing --- src/GW/GW_ppBSE.f90 | 81 --------------------------------------------- src/GW/RG0W0.f90 | 49 --------------------------- 2 files changed, 130 deletions(-) diff --git a/src/GW/GW_ppBSE.f90 b/src/GW/GW_ppBSE.f90 index e26b2e1..371836d 100644 --- a/src/GW/GW_ppBSE.f90 +++ b/src/GW/GW_ppBSE.f90 @@ -66,10 +66,6 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS, double precision,intent(out) :: EcBSE(nspin) - double precision :: t0, t1 - double precision :: tt0, tt1 - - call wall_time(t0) !--------------------------------- ! Compute (singlet) RPA screening @@ -81,25 +77,13 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS, allocate(OmRPA(nS),XpY_RPA(nS,nS),XmY_RPA(nS,nS),rho_RPA(nBas,nBas,nS), & Aph(nS,nS),Bph(nS,nS)) - call wall_time(tt0) call phLR_A(isp_W,dRPA_W,nBas,nC,nO,nV,nR,nS,1d0,eW,ERI,Aph) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_A =',tt1-tt0,' seconds' - call wall_time(tt0) if(.not.TDA_W) call phLR_B(isp_W,dRPA_W,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_B =',tt1-tt0,' seconds' - call wall_time(tt0) call phLR(TDA_W,nS,Aph,Bph,EcRPA,OmRPA,XpY_RPA,XmY_RPA) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR =',tt1-tt0,' seconds' - call wall_time(tt0) call GW_excitation_density(nBas,nC,nO,nR,nS,ERI,XpY_RPA,rho_RPA) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_excitation_density =',tt1-tt0,' seconds' deallocate(XpY_RPA,XmY_RPA,Aph,Bph) @@ -127,61 +111,30 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS, ! Compute BSE excitation energies - call wall_time(tt0) call GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_C =',tt1-tt0,' seconds' - - call wall_time(tt0) call GW_ppBSE_static_kernel_D(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_D =',tt1-tt0,' seconds' - - call wall_time(tt0) if(.not.TDA) call GW_ppBSE_static_kernel_B(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_B =',tt1-tt0,' seconds' - call wall_time(tt0) call ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_C =',tt1-tt0,' seconds' - - call wall_time(tt0) call ppLR_D(ispin,nBas,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_D =',tt1-tt0,' seconds' - - call wall_time(tt0) if(.not.TDA) call ppLR_B(ispin,nBas,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_B =',tt1-tt0,' seconds' Bpp(:,:) = Bpp(:,:) + KB_sta(:,:) Cpp(:,:) = Cpp(:,:) + KC_sta(:,:) Dpp(:,:) = Dpp(:,:) + KD_sta(:,:) - call wall_time(tt0) call ppLR(TDA,nOO,nVV,Bpp,Cpp,Dpp,Om1,X1,Y1,Om2,X2,Y2,EcBSE(ispin)) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR =',tt1-tt0,' seconds' - call wall_time(tt0) call ppLR_transition_vectors(.true.,nBas,nC,nO,nV,nR,nOO,nVV,dipole_int,Om1,X1,Y1,Om2,X2,Y2) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_transition_vectors =',tt1-tt0,' seconds' !----------------------------------------------------! ! Compute the dynamical screening at the ppBSE level ! !----------------------------------------------------! - call wall_time(tt0) if(dBSE) & call GW_ppBSE_dynamic_perturbation(ispin,dTDA,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,eW,eGW,ERI,dipole_int,OmRPA,rho_RPA, & Om1,X1,Y1,Om2,X2,Y2,KB_sta,KC_sta,KD_sta) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_dynamic_perturbation =',tt1-tt0,' seconds' deallocate(Om1,X1,Y1,Om2,X2,Y2,Bpp,Cpp,Dpp,KB_sta,KC_sta,KD_sta) end if @@ -210,66 +163,32 @@ subroutine GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS, ! Compute BSE excitation energies - call wall_time(tt0) call GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,1d0,ERI,OmRPA,rho_RPA,KC_sta) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_C =',tt1-tt0,' seconds' - - call wall_time(tt0) call GW_ppBSE_static_kernel_D(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,1d0,ERI,OmRPA,rho_RPA,KD_sta) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_D =',tt1-tt0,' seconds' - - call wall_time(tt0) if(.not.TDA) call GW_ppBSE_static_kernel_B(ispin,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,1d0,ERI,OmRPA,rho_RPA,KB_sta) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_static_kernel_B =',tt1-tt0,' seconds' - call wall_time(tt0) call ppLR_C(ispin,nBas,nC,nO,nV,nR,nVV,1d0,eGW,ERI,Cpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_C =',tt1-tt0,' seconds' - - call wall_time(tt0) call ppLR_D(ispin,nBas,nC,nO,nV,nR,nOO,1d0,eGW,ERI,Dpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_D =',tt1-tt0,' seconds' - - call wall_time(tt0) if(.not.TDA) call ppLR_B(ispin,nBas,nC,nO,nV,nR,nOO,nVV,1d0,ERI,Bpp) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_B =',tt1-tt0,' seconds' Bpp(:,:) = Bpp(:,:) + KB_sta(:,:) Cpp(:,:) = Cpp(:,:) + KC_sta(:,:) Dpp(:,:) = Dpp(:,:) + KD_sta(:,:) - call wall_time(tt0) call ppLR(TDA,nOO,nVV,Bpp,Cpp,Dpp,Om1,X1,Y1,Om2,X2,Y2,EcBSE(ispin)) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR =',tt1-tt0,' seconds' - call wall_time(tt0) call ppLR_transition_vectors(.false.,nBas,nC,nO,nV,nR,nOO,nVV,dipole_int,Om1,X1,Y1,Om2,X2,Y2) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppLR_transition_vectors =',tt1-tt0,' seconds' !----------------------------------------------------! ! Compute the dynamical screening at the ppBSE level ! !----------------------------------------------------! - call wall_time(tt0) if(dBSE) & call GW_ppBSE_dynamic_perturbation(ispin,dTDA,eta,nBas,nC,nO,nV,nR,nS,nOO,nVV,eW,eGW,ERI,dipole_int,OmRPA,rho_RPA, & Om1,X1,Y1,Om2,X2,Y2,KB_sta,KC_sta,KD_sta) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE_dynamic_perturbation =',tt1-tt0,' seconds' deallocate(Om1,X1,Y1,Om2,X2,Y2,Bpp,Cpp,Dpp,KB_sta,KC_sta,KD_sta) end if - call wall_time(t1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_ppBSE =',t1-t0,' seconds' - end subroutine diff --git a/src/GW/RG0W0.f90 b/src/GW/RG0W0.f90 index 1bd18e0..65ffb4d 100644 --- a/src/GW/RG0W0.f90 +++ b/src/GW/RG0W0.f90 @@ -59,10 +59,6 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA double precision,allocatable :: eGWlin(:) double precision,allocatable :: eGW(:) - double precision :: t0, t1 - double precision :: tt0, tt1 - - call wall_time(t0) ! Output variables @@ -106,48 +102,27 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA ! Compute screening ! !-------------------! - call wall_time(tt0) call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,eHF,ERI,Aph) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_A =',tt1-tt0,' seconds' - call wall_time(tt0) if(.not.TDA_W) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_B =',tt1-tt0,' seconds' - call wall_time(tt0) call phLR(TDA_W,nS,Aph,Bph,EcRPA,Om,XpY,XmY) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR =',tt1-tt0,' seconds' - call wall_time(tt0) if(print_W) call print_excitation_energies('phRPA@RHF','singlet',nS,Om) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for print_excitation_energies =',tt1-tt0,' seconds' !--------------------------! ! Compute spectral weights ! !--------------------------! - call wall_time(tt0) call GW_excitation_density(nBas,nC,nO,nR,nS,ERI,XpY,rho) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_excitation_density =',tt1-tt0,' seconds' !------------------------! ! Compute GW self-energy ! !------------------------! - call wall_time(tt0) if(regularize) call GW_regularization(nBas,nC,nO,nV,nR,nS,eHF,Om,rho) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_regularization =',tt1-tt0,' seconds' - call wall_time(tt0) call GW_self_energy_diag(eta,nBas,nC,nO,nV,nR,nS,eHF,Om,rho,EcGM,SigC,Z) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for GW_self_energy_diag =',tt1-tt0,' seconds' !-----------------------------------! ! Solve the quasi-particle equation ! @@ -155,7 +130,6 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA ! Linearized or graphical solution? - call wall_time(tt0) eGWlin(:) = eHF(:) + Z(:)*SigC(:) if(linearize) then @@ -173,8 +147,6 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA call GW_QP_graph(eta,nBas,nC,nO,nV,nR,nS,eHF,Om,rho,eGWlin,eHF,eGW,Z) end if - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for QP =',tt1-tt0,' seconds' ! Plot self-energy, renormalization factor, and spectral function @@ -188,33 +160,20 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA ! Compute the RPA correlation energy - call wall_time(tt0) call phLR_A(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,eGW,ERI,Aph) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_A =',tt1-tt0,' seconds' - call wall_time(tt0) if(.not.TDA_W) call phLR_B(ispin,dRPA,nBas,nC,nO,nV,nR,nS,1d0,ERI,Bph) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR_B =',tt1-tt0,' seconds' - call wall_time(tt0) call phLR(TDA_W,nS,Aph,Bph,EcRPA,Om,XpY,XmY) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for phLR =',tt1-tt0,' seconds' !--------------! ! Dump results ! !--------------! - call wall_time(tt0) call print_RG0W0(nBas,nO,eHF,ENuc,ERHF,SigC,Z,eGW,EcRPA,EcGM) - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for print_RG0W0 =',tt1-tt0,' seconds' ! Perform BSE calculation - call wall_time(tt0) if(dophBSE) then call GW_phBSE(dophBSE2,TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS,ERI,dipole_int,eHF,eGW,EcBSE) @@ -265,10 +224,7 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA end if end if - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for phBSE =',tt1-tt0,' seconds' - call wall_time(tt0) if(doppBSE) then call GW_ppBSE(TDA_W,TDA,dBSE,dTDA,singlet,triplet,eta,nBas,nC,nO,nV,nR,nS,ERI,dipole_int,eHF,eGW,EcBSE) @@ -285,8 +241,6 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA write(*,*) end if - call wall_time(tt1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for ppBSE =',tt1-tt0,' seconds' ! end if @@ -300,7 +254,4 @@ subroutine RG0W0(dotest,doACFDT,exchange_kernel,doXBS,dophBSE,dophBSE2,TDA_W,TDA end if - call wall_time(t1) - write(*,'(A65,1X,F9.3,A8)') 'Wall time for RG0W0 =',t1-t0,' seconds' - end subroutine From 368cd7adf9a58395c7d0d57e47001f5dfae99f22 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Thu, 22 Aug 2024 18:35:17 +0200 Subject: [PATCH 15/15] OpenMP for tmp_m in GW_ppBSE_static_kernel_C --- src/GW/GW_ppBSE_static_kernel_C.f90 | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/GW/GW_ppBSE_static_kernel_C.f90 b/src/GW/GW_ppBSE_static_kernel_C.f90 index 2df0025..eb1e912 100644 --- a/src/GW/GW_ppBSE_static_kernel_C.f90 +++ b/src/GW/GW_ppBSE_static_kernel_C.f90 @@ -51,6 +51,10 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI allocate(tmp_m(nBas,nBas,nS)) allocate(tmp(nBas,nBas,nBas,nBas)) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(m, c, a, eps) & + !$OMP SHARED(nS, nBas, eta2, Om, rho, tmp_m) + !$OMP DO do m = 1, nS eps = Om(m) / (Om(m)*Om(m) + eta2) do c = 1, nBas @@ -59,6 +63,8 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL call dgemm("N", "T", nBas*nBas, nBas*nBas, nS, 1.d0, & tmp_m(1,1,1), nBas*nBas, rho(1,1,1), nBas*nBas, & @@ -216,6 +222,10 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI allocate(tmp_m(nBas,nBas,nS)) allocate(tmp(nBas,nBas,nBas,nBas)) + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(m, c, a, eps) & + !$OMP SHARED(nS, nBas, eta2, Om, rho, tmp_m) + !$OMP DO do m = 1, nS eps = Om(m) / (Om(m)*Om(m) + eta2) do c = 1, nBas @@ -224,6 +234,8 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI enddo enddo enddo + !$OMP END DO + !$OMP END PARALLEL call dgemm("N", "T", nBas*nBas, nBas*nBas, nS, 1.d0, & tmp_m(1,1,1), nBas*nBas, rho(1,1,1), nBas*nBas, &