4
1
mirror of https://github.com/pfloos/quack synced 2024-08-28 23:32:04 +02:00

rho1 & rho2: use DGEMM instead of OpenMP

This commit is contained in:
Abdallah Ammar 2024-08-19 19:22:37 +02:00
parent 50bfb261ca
commit 635e7ae457

View File

@ -34,6 +34,10 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1
double precision,intent(out) :: rho1(nBas,nBas,nVV)
double precision,intent(out) :: rho2(nBas,nBas,nOO)
integer :: dim_1, dim_2
double precision, allocatable :: ERI_1(:,:,:)
double precision, allocatable :: ERI_2(:,:,:)
! Initialization
rho1(:,:,:) = 0d0
@ -209,81 +213,135 @@ subroutine GTpp_excitation_density(ispin,nBas,nC,nO,nV,nR,nOO,nVV,ERI,X1,Y1,rho1
! alpha-beta block
!----------------------------------------------
! TODO
! debug for nC & nR
if(ispin == 3) then
print*, "ispin = ", ispin
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) &
!$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2)
dim_1 = (nBas - nO) * (nBas - nO)
dim_2 = nO * nO
allocate(ERI_1(nBas,nBas,dim_1), ERI_2(nBas,nBas,dim_2))
ERI_1 = 0.d0
ERI_2 = 0.d0
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP PRIVATE(p, q, c, d, cd, k, l, kl) &
!$OMP SHARED(nC, nBas, nR, nO, ERI_1, ERI_2, ERI)
!$OMP DO COLLAPSE(2)
do q = nC+1, nBas-nR
do p = nC+1, nBas-nR
ab = 0
do a = nO+1, nBas-nR
do b = nO+1, nBas-nR
ab = ab + 1
cd = 0
do c = nO+1, nBas-nR
do d = nO+1, nBas-nR
cd = cd + 1
rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,c,d)*X1(cd,ab)
end do
end do
kl = 0
do k = nC+1, nO
do l = nC+1, nO
kl = kl + 1
rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,k,l)*Y1(kl,ab)
end do
end do
cd = 0
do c = nO+1, nBas-nR
do d = nO+1, nBas-nR
cd = cd + 1
ERI_1(p,q,cd) = ERI(p,q,c,d)
enddo
enddo
kl = 0
do k = nC+1, nO
do l = nC+1, nO
kl = kl + 1
ERI_2(p,q,kl) = ERI(p,q,k,l)
end do
end do
ij = 0
do i = nC+1, nO
do j = nC+1, nO
ij = ij + 1
cd = 0
do c = nO+1, nBas-nR
do d = nO+1, nBas-nR
cd = cd + 1
rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,c,d)*X2(cd,ij)
end do
end do
kl = 0
do k = nC+1, nO
do l = nC+1, nO
kl = kl + 1
rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,k,l)*Y2(kl,ij)
end do
end do
end do
end do
end do
end do
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
call dgemm("N", "N", nBas*nBas, dim_1, dim_1, 1.d0, &
ERI_1(1,1,1), nBas*nBas, X1(1,1), dim_1, &
0.d0, rho1(1,1,1), nBas*nBas)
call dgemm("N", "N", nBas*nBas, dim_1, dim_2, 1.d0, &
ERI_2(1,1,1), nBas*nBas, Y1(1,1), dim_2, &
1.d0, rho1(1,1,1), nBas*nBas)
call dgemm("N", "N", nBas*nBas, dim_2, dim_1, 1.d0, &
ERI_1(1,1,1), nBas*nBas, X2(1,1), dim_1, &
0.d0, rho2(1,1,1), nBas*nBas)
call dgemm("N", "N", nBas*nBas, dim_2, dim_2, 1.d0, &
ERI_2(1,1,1), nBas*nBas, Y2(1,1), dim_2, &
1.d0, rho2(1,1,1), nBas*nBas)
deallocate(ERI_1, ERI_2)
! !$OMP PARALLEL DEFAULT(NONE) &
! !$OMP PRIVATE(p, q, a, b, ab, c, d, cd, i, j, ij, k, l, kl) &
! !$OMP SHARED(nC, nBas, nR, nO, rho1, rho2, ERI, X1, Y1, X2, Y2)
! !$OMP DO COLLAPSE(2)
!
! do q = nC+1, nBas-nR
! do p = nC+1, nBas-nR
!
! ab = 0
! do a = nO+1, nBas-nR
! do b = nO+1, nBas-nR
!
! ab = ab + 1
!
! cd = 0
! do c = nO+1, nBas-nR
! do d = nO+1, nBas-nR
!
! cd = cd + 1
!
! rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,c,d)*X1(cd,ab)
! end do
! end do
!
! kl = 0
! do k = nC+1, nO
! do l = nC+1, nO
!
! kl = kl + 1
!
! rho1(p,q,ab) = rho1(p,q,ab) + ERI(p,q,k,l)*Y1(kl,ab)
! end do
! end do
!
! end do
! end do
!
! ij = 0
! do i = nC+1, nO
! do j = nC+1, nO
!
! ij = ij + 1
!
! cd = 0
! do c = nO+1, nBas-nR
! do d = nO+1, nBas-nR
!
! cd = cd + 1
!
! rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,c,d)*X2(cd,ij)
! end do
! end do
!
! kl = 0
! do k = nC+1, nO
! do l = nC+1, nO
!
! kl = kl + 1
!
! rho2(p,q,ij) = rho2(p,q,ij) + ERI(p,q,k,l)*Y2(kl,ij)
! end do
! end do
!
! end do
! end do
!
! end do
! end do
! !$OMP END DO
! !$OMP END PARALLEL
end if
end subroutine