From 5a738f00b94d3e6aa53fe7c7eeb606093ae79665 Mon Sep 17 00:00:00 2001 From: Abdallah Ammar Date: Wed, 21 Aug 2024 09:03:40 +0200 Subject: [PATCH] OpenMP -> DGEMM for ispin=2,4 in GTpp_excitation_density --- src/GT/GTpp_excitation_density.f90 | 189 ++++++++++++++++++----------- 1 file changed, 121 insertions(+), 68 deletions(-) diff --git a/src/GT/GTpp_excitation_density.f90 b/src/GT/GTpp_excitation_density.f90 index fe86ef3..05f9c2d 100644 --- a/src/GT/GTpp_excitation_density.f90 +++ b/src/GT/GTpp_excitation_density.f90 @@ -2,6 +2,11 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 ! Compute excitation densities for T-matrix self-energy + ! TODO + ! debug DGEMM for nC != 0 + ! and nR != 0 + + implicit none @@ -127,81 +132,132 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 if(ispin == 2 .or. ispin == 4) then - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & - !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) + dim_1 = (nBas - nO) * (nBas - nO - 1) / 2 + dim_2 = nO * (nO - 1) / 2 + + allocate(ERI_1(nBas,nBas,dim_1), ERI_2(nBas,nBas,dim_2)) + ERI_1 = 0.d0 + ERI_2 = 0.d0 + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(p, q, c, d, cd, k, l, kl) & + !$OMP SHARED(nC, nBas, nR, nO, ERI_1, ERI_2, ERI) !$OMP DO COLLAPSE(2) do q = nC+1, nBas-nR do p = nC+1, nBas-nR - - ab = 0 + cd = 0 + do c = nO+1, nBas-nR + do d = c+1, nBas-nR + cd = cd + 1 + ERI_1(p,q,cd) = ERI(p,q,c,d) - ERI(p,q,d,c) + enddo + enddo + kl = 0 + do k = nC+1, nO + do l = k+1, nO + kl = kl + 1 + ERI_2(p,q,kl) = ERI(p,q,k,l) - ERI(p,q,l,k) + end do + end do + enddo + enddo + !$OMP END DO + !$OMP END PARALLEL - do a = nO+1, nBas-nR - do b = a+1, nBas-nR + call dgemm("N", "N", nBas*nBas, dim_1, dim_1, 1.d0, & + ERI_1(1,1,1), nBas*nBas, X1(1,1), dim_1, & + 0.d0, rho1(1,1,1), nBas*nBas) - ab = ab + 1 - - cd = 0 - do c = nO+1, nBas-nR - do d = c+1, nBas-nR + call dgemm("N", "N", nBas*nBas, dim_1, dim_2, 1.d0, & + ERI_2(1,1,1), nBas*nBas, Y1(1,1), dim_2, & + 1.d0, rho1(1,1,1), nBas*nBas) - cd = cd + 1 + call dgemm("N", "N", nBas*nBas, dim_2, dim_1, 1.d0, & + ERI_1(1,1,1), nBas*nBas, X2(1,1), dim_1, & + 0.d0, rho2(1,1,1), nBas*nBas) - rho1(p,q,ab) = rho1(p,q,ab) & - + (ERI(p,q,c,d) - ERI(p,q,d,c))*X1(cd,ab) - end do ! d - end do ! c - - kl = 0 - do k = nC+1, nO - do l = k+1, nO + call dgemm("N", "N", nBas*nBas, dim_2, dim_2, 1.d0, & + ERI_2(1,1,1), nBas*nBas, Y2(1,1), dim_2, & + 1.d0, rho2(1,1,1), nBas*nBas) - kl = kl + 1 + deallocate(ERI_1, ERI_2) - rho1(p,q,ab) = rho1(p,q,ab) & - + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y1(kl,ab) - end do ! l - end do ! k - - end do ! b - end do ! a - - ij = 0 - do i = nC+1, nO - do j = i+1, nO - ij = ij + 1 - - cd = 0 - - do c = nO+1, nBas-nR - do d = c+1, nBas-nR - - cd = cd + 1 - - rho2(p,q,ij) = rho2(p,q,ij) & - + (ERI(p,q,c,d) - ERI(p,q,d,c))*X2(cd,ij) - end do ! d - end do ! c - - kl = 0 - do k = nC+1, nO - do l = k+1, nO - - kl = kl + 1 - - rho2(p,q,ij) = rho2(p,q,ij) & - + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y2(kl,ij) - end do ! l - end do ! k - - end do ! j - end do ! i - - end do ! p - end do ! q - !$OMP END DO - !$OMP END PARALLEL +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) & +! !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2) +! !$OMP DO COLLAPSE(2) +! do q = nC+1, nBas-nR +! do p = nC+1, nBas-nR +! +! ab = 0 +! +! do a = nO+1, nBas-nR +! do b = a+1, nBas-nR +! +! ab = ab + 1 +! +! cd = 0 +! do c = nO+1, nBas-nR +! do d = c+1, nBas-nR +! +! cd = cd + 1 +! +! rho1(p,q,ab) = rho1(p,q,ab) & +! + (ERI(p,q,c,d) - ERI(p,q,d,c))*X1(cd,ab) +! end do ! d +! end do ! c +! +! kl = 0 +! do k = nC+1, nO +! do l = k+1, nO +! +! kl = kl + 1 +! +! rho1(p,q,ab) = rho1(p,q,ab) & +! + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y1(kl,ab) +! end do ! l +! end do ! k +! +! end do ! b +! end do ! a +! +! ij = 0 +! do i = nC+1, nO +! do j = i+1, nO +! +! ij = ij + 1 +! +! cd = 0 +! +! do c = nO+1, nBas-nR +! do d = c+1, nBas-nR +! +! cd = cd + 1 +! +! rho2(p,q,ij) = rho2(p,q,ij) & +! + (ERI(p,q,c,d) - ERI(p,q,d,c))*X2(cd,ij) +! end do ! d +! end do ! c +! +! kl = 0 +! do k = nC+1, nO +! do l = k+1, nO +! +! kl = kl + 1 +! +! rho2(p,q,ij) = rho2(p,q,ij) & +! + (ERI(p,q,k,l) - ERI(p,q,l,k))*Y2(kl,ij) +! end do ! l +! end do ! k +! +! end do ! j +! end do ! i +! +! end do ! p +! end do ! q +! !$OMP END DO +! !$OMP END PARALLEL end if @@ -209,9 +265,6 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1 ! alpha-beta block !---------------------------------------------- - ! TODO - ! debug for nC & nR - if(ispin == 3) then dim_1 = (nBas - nO) * (nBas - nO)