10
1
mirror of https://github.com/pfloos/quack synced 2025-01-08 20:33:19 +01:00

optimized ppBSE kernels for Davidson: ispin = 2 + ERI access improv

This commit is contained in:
Abdallah Ammar 2024-09-11 01:36:42 +02:00
parent 0deffa8638
commit ee2fa09dda
2 changed files with 294 additions and 101 deletions

View File

@ -293,6 +293,7 @@ subroutine ppLR_GW_HR_calc_oneshot(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
allocate(Om_tmp(nS)) allocate(Om_tmp(nS))
a0 = nOrb - nR - nO a0 = nOrb - nR - nO
eta2 = eta * eta eta2 = eta * eta
do m = 1, nS do m = 1, nS
Om_tmp(m) = Om(m) / (Om(m) * Om(m) + eta2) Om_tmp(m) = Om(m) / (Om(m) * Om(m) + eta2)
@ -305,8 +306,7 @@ subroutine ppLR_GW_HR_calc_oneshot(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
!$OMP PRIVATE(a, b, aa, ab, c, d, cd, i, j, ij, m, state, & !$OMP PRIVATE(a, b, aa, ab, c, d, cd, i, j, ij, m, state, &
!$OMP tmp_e, tmp_ab, chi, mat_tmp) & !$OMP tmp_e, tmp_ab, chi, mat_tmp) &
!$OMP SHARED(nC, nO, nOrb, nR, nS, n_states_diag, nVV, & !$OMP SHARED(nC, nO, nOrb, nR, nS, n_states_diag, nVV, &
!$OMP nOO, a0, eF, lambda, e, Om_tmp, rho, ERI, U, & !$OMP nOO, a0, eF, lambda, e, Om_tmp, rho, ERI, H_mat)
!$OMP H_mat)
!$OMP DO SCHEDULE(GUIDED) !$OMP DO SCHEDULE(GUIDED)
do a = nO+1, nOrb-nR do a = nO+1, nOrb-nR
aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO
@ -335,7 +335,7 @@ subroutine ppLR_GW_HR_calc_oneshot(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
mat_tmp = 0.7071067811865475d0 * mat_tmp mat_tmp = 0.7071067811865475d0 * mat_tmp
endif endif
mat_tmp = mat_tmp * (4.d0 * chi + ERI(c,d,a,b) + ERI(d,c,a,b)) mat_tmp = mat_tmp * (4.d0 * chi + ERI(d,c,b,a) + ERI(d,c,a,b))
if((a .eq. c) .and. (b .eq. d)) then if((a .eq. c) .and. (b .eq. d)) then
mat_tmp = mat_tmp + tmp_e mat_tmp = mat_tmp + tmp_e
endif endif
@ -359,7 +359,7 @@ subroutine ppLR_GW_HR_calc_oneshot(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
mat_tmp = 0.7071067811865475d0 * mat_tmp mat_tmp = 0.7071067811865475d0 * mat_tmp
endif endif
mat_tmp = mat_tmp * (4.d0 * chi + ERI(i,j,a,b) + ERI(j,i,a,b)) mat_tmp = mat_tmp * (4.d0 * chi + ERI(j,i,b,a) + ERI(j,i,a,b))
H_mat(ab,ij) = -mat_tmp H_mat(ab,ij) = -mat_tmp
enddo ! j enddo ! j
@ -416,7 +416,7 @@ subroutine ppLR_GW_HR_calc_oneshot(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
mat_tmp = 0.7071067811865475d0 * mat_tmp mat_tmp = 0.7071067811865475d0 * mat_tmp
endif endif
mat_tmp = mat_tmp * (4.d0 * chi + ERI(a,b,i,j) + ERI(a,b,j,i)) mat_tmp = mat_tmp * (4.d0 * chi + ERI(b,a,j,i) + ERI(b,a,i,j))
H_mat(ij,ab) = mat_tmp H_mat(ij,ab) = mat_tmp
enddo ! b enddo ! b
@ -437,7 +437,7 @@ subroutine ppLR_GW_HR_calc_oneshot(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
mat_tmp = 0.7071067811865475d0 * mat_tmp mat_tmp = 0.7071067811865475d0 * mat_tmp
endif endif
mat_tmp = mat_tmp * (4.d0 * chi + ERI(i,j,k,l) + ERI(i,j,l,k)) mat_tmp = mat_tmp * (4.d0 * chi + ERI(l,k,j,i) + ERI(l,k,i,j))
if((i .eq. k) .and. (j .eq. l)) then if((i .eq. k) .and. (j .eq. l)) then
mat_tmp = mat_tmp - tmp_e mat_tmp = mat_tmp - tmp_e
@ -458,6 +458,154 @@ subroutine ppLR_GW_HR_calc_oneshot(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
deallocate(Om_tmp) deallocate(Om_tmp)
! ---
elseif(ispin .eq. 2) then
! ---
allocate(Om_tmp(nS))
a0 = nOrb - nR - nO - 1
eta2 = eta * eta
do m = 1, nS
Om_tmp(m) = Om(m) / (Om(m) * Om(m) + eta2)
enddo
allocate(H_mat(nVV,nOO+nVV))
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(a, b, aa, ab, c, d, cd, i, j, ij, m, state, &
!$OMP tmp_e, tmp_ab, chi, mat_tmp) &
!$OMP SHARED(nC, nO, nOrb, nR, nS, n_states_diag, nVV, &
!$OMP nOO, a0, eF, lambda, e, Om_tmp, rho, ERI, H_mat)
!$OMP DO SCHEDULE(GUIDED)
do a = nO+1, nOrb-nR
aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO - 1
do b = a+1, nOrb-nR
ab = aa + b
tmp_e = e(a) + e(b) - eF
cd = 0
do c = nO+1, nOrb-nR
do d = c+1, nOrb-nR
cd = cd + 1
mat_tmp = lambda * (ERI(d,c,b,a) - ERI(d,c,a,b))
chi = 0.d0
do m = 1, nS
chi = chi - Om_tmp(m) * (rho(m,a,c) * rho(m,b,d) - rho(m,a,d) * rho(m,b,c))
enddo
mat_tmp = mat_tmp + 4.d0 * lambda * chi
if((a .eq. c) .and. (b .eq. d)) then
mat_tmp = mat_tmp + tmp_e
endif
H_mat(ab,cd) = mat_tmp
enddo ! d
enddo ! c
ij = nVV
do i = nC+1, nO
do j = i+1, nO
ij = ij + 1
mat_tmp = lambda * (ERI(j,i,b,a) - ERI(j,i,a,b))
chi = 0.d0
do m = 1, nS
chi = chi - Om_tmp(m) * (rho(m,i,a) * rho(m,j,b) - rho(m,i,b) * rho(m,a,j))
end do
mat_tmp = mat_tmp + 4.d0 * lambda * chi
H_mat(ab,ij) = -mat_tmp
enddo ! j
enddo ! i
enddo ! b
enddo ! a
!$OMP END DO
!$OMP END PARALLEL
call dgemm("N", "N", nVV, n_states_diag, nOO+nVV, 1.d0, &
H_mat(1,1), size(H_mat, 1), U(1,1), size(U, 1), &
0.d0, W(1,1), size(W, 1))
deallocate(H_mat)
! ---
i0 = nO - nC - 1
allocate(H_mat(nOO,nOO+nVV))
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i, j, ii, ij, a, b, ab, k, l, kl, m, state, &
!$OMP tmp_e, tmp_ij, chi, mat_tmp) &
!$OMP SHARED(nC, nO, nOrb, nR, nS, nVV, i0, &
!$OMP eF, lambda, e, Om_tmp, rho, ERI, U, H_mat)
!$OMP DO SCHEDULE(GUIDED)
do i = nC+1, nO
ii = i0 * (i - nC - 1) - (i - nC - 1) * (i - nC) / 2 - nC - 1
do j = i+1, nO
ij = ii + j
tmp_e = e(i) + e(j) - eF
ab = 0
do a = nO+1, nOrb-nR
do b = a+1, nOrb-nR
ab = ab + 1
mat_tmp = lambda * (ERI(b,a,j,i) - ERI(b,a,i,j))
chi = 0.d0
do m = 1, nS
chi = chi - Om_tmp(m) * (rho(m,i,a) * rho(m,j,b) - rho(m,i,b) * rho(m,a,j))
enddo
mat_tmp = mat_tmp + 4.d0 * lambda * chi
H_mat(ij,ab) = mat_tmp
enddo ! b
enddo ! a
kl = nVV
do k = nC+1, nO
do l = k+1, nO
kl = kl + 1
mat_tmp = lambda * (ERI(l,k,j,i) - ERI(l,k,i,j))
chi = 0.d0
do m = 1, nS
chi = chi - Om_tmp(m) * (rho(m,i,k) * rho(m,j,l) - rho(m,i,l) * rho(m,j,k))
enddo
mat_tmp = mat_tmp + 4.d0 * lambda * chi
if((i .eq. k) .and. (j .eq. l)) then
mat_tmp = mat_tmp - tmp_e
endif
H_mat(ij,kl) = -mat_tmp
enddo ! l
enddo ! k
enddo ! j
enddo ! i
!$OMP END DO
!$OMP END PARALLEL
call dgemm("N", "N", nOO, n_states_diag, nOO+nVV, 1.d0, &
H_mat(1,1), size(H_mat, 1), U(1,1), size(U, 1), &
0.d0, W(nVV+1,1), size(W, 1))
deallocate(H_mat)
deallocate(Om_tmp)
else else
print*, ' Error in ppLR_GW_HR_calc_oneshot' print*, ' Error in ppLR_GW_HR_calc_oneshot'
@ -555,7 +703,7 @@ subroutine ppLR_GW_HR_calc_batches(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
mat_tmp = 0.7071067811865475d0 * mat_tmp mat_tmp = 0.7071067811865475d0 * mat_tmp
endif endif
mat_tmp = mat_tmp * (4.d0 * chi + ERI(c,d,a,b) + ERI(d,c,a,b)) mat_tmp = mat_tmp * (4.d0 * chi + ERI(d,c,b,a) + ERI(d,c,a,b))
if((a .eq. c) .and. (b .eq. d)) then if((a .eq. c) .and. (b .eq. d)) then
mat_tmp = mat_tmp + tmp_e mat_tmp = mat_tmp + tmp_e
endif endif
@ -579,7 +727,7 @@ subroutine ppLR_GW_HR_calc_batches(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
mat_tmp = 0.7071067811865475d0 * mat_tmp mat_tmp = 0.7071067811865475d0 * mat_tmp
endif endif
mat_tmp = mat_tmp * (4.d0 * chi + ERI(i,j,a,b) + ERI(j,i,a,b)) mat_tmp = mat_tmp * (4.d0 * chi + ERI(j,i,b,a) + ERI(j,i,a,b))
H_mat(ij,bb) = -mat_tmp H_mat(ij,bb) = -mat_tmp
enddo ! j enddo ! j
@ -636,7 +784,7 @@ subroutine ppLR_GW_HR_calc_batches(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
mat_tmp = 0.7071067811865475d0 * mat_tmp mat_tmp = 0.7071067811865475d0 * mat_tmp
endif endif
mat_tmp = mat_tmp * (4.d0 * chi + ERI(a,b,i,j) + ERI(a,b,j,i)) mat_tmp = mat_tmp * (4.d0 * chi + ERI(b,a,j,i) + ERI(b,a,i,j))
H_mat(ij,ab) = mat_tmp H_mat(ij,ab) = mat_tmp
enddo ! b enddo ! b
@ -657,7 +805,7 @@ subroutine ppLR_GW_HR_calc_batches(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
mat_tmp = 0.7071067811865475d0 * mat_tmp mat_tmp = 0.7071067811865475d0 * mat_tmp
endif endif
mat_tmp = mat_tmp * (4.d0 * chi + ERI(i,j,k,l) + ERI(i,j,l,k)) mat_tmp = mat_tmp * (4.d0 * chi + ERI(l,k,j,i) + ERI(l,k,i,j))
if((i .eq. k) .and. (j .eq. l)) then if((i .eq. k) .and. (j .eq. l)) then
mat_tmp = mat_tmp - tmp_e mat_tmp = mat_tmp - tmp_e
@ -678,117 +826,161 @@ subroutine ppLR_GW_HR_calc_batches(ispin, nOrb, nC, nO, nR, nOO, nVV, nS, lambda
deallocate(Om_tmp) deallocate(Om_tmp)
! ---
elseif(ispin .eq. 2) then elseif(ispin .eq. 2) then
call omp_set_max_active_levels(1)
allocate(Om_tmp(nS))
a0 = nOrb - nR - nO - 1
eta2 = eta * eta eta2 = eta * eta
do m = 1, nS
Om_tmp(m) = Om(m) / (Om(m) * Om(m) + eta2)
enddo
ab = 0 ! ---
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(a, b, aa, ab, c, d, cd, i, j, ij, m, state, &
!$OMP bb, tmp_e, tmp_ab, chi, mat_tmp, H_mat) &
!$OMP SHARED(nC, nO, nOrb, nR, nS, n_states_diag, nVV, &
!$OMP nOO, a0, eF, lambda, e, Om_tmp, rho, ERI, U, W)
allocate(H_mat(nOO+nVV,a0))
!$OMP DO SCHEDULE(GUIDED)
do a = nO+1, nOrb-nR do a = nO+1, nOrb-nR
aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO - 1
do b = a+1, nOrb-nR do b = a+1, nOrb-nR
ab = ab + 1 ab = aa + b
do state = 1, n_states_diag bb = b - a
W(ab,state) = 0.d0 tmp_e = e(a) + e(b) - eF
cd = 0 cd = 0
do c = nO+1, nOrb-nR do c = nO+1, nOrb-nR
do d = c+1, nOrb-nR do d = c+1, nOrb-nR
cd = cd + 1 cd = cd + 1
mat_tmp = (e(a) + e(b) - eF) * Kronecker_delta(a, c) * Kronecker_delta(b, d) & mat_tmp = lambda * (ERI(d,c,b,a) - ERI(d,c,a,b))
+ lambda * (ERI(a,b,c,d) - ERI(a,b,d,c))
chi = 0.d0 chi = 0.d0
do m = 1, nS do m = 1, nS
eps = Om(m)**2 + eta2 chi = chi - Om_tmp(m) * (rho(m,a,c) * rho(m,b,d) - rho(m,a,d) * rho(m,b,c))
chi = chi - rho(m,a,c) * rho(m,b,d) * Om(m) / eps &
+ rho(m,a,d) * rho(m,b,c) * Om(m) / eps
enddo enddo
mat_tmp = mat_tmp + 4.d0 * lambda * chi mat_tmp = mat_tmp + 4.d0 * lambda * chi
W(ab,state) = W(ab,state) + mat_tmp * U(cd,state) if((a .eq. c) .and. (b .eq. d)) then
enddo mat_tmp = mat_tmp + tmp_e
enddo endif
H_mat(cd,bb) = mat_tmp
enddo ! d
enddo ! c
ij = nVV ij = nVV
do i = nC+1, nO do i = nC+1, nO
do j = i+1, nO do j = i+1, nO
ij = ij + 1 ij = ij + 1
mat_tmp = lambda * (ERI(a,b,i,j) - ERI(a,b,j,i)) mat_tmp = lambda * (ERI(j,i,b,a) - ERI(j,i,a,b))
chi = 0.d0 chi = 0.d0
do m = 1, nS do m = 1, nS
eps = Om(m)**2 + eta2 chi = chi - Om_tmp(m) * (rho(m,i,a) * rho(m,j,b) - rho(m,i,b) * rho(m,a,j))
chi = chi - rho(m,i,a) * rho(m,j,b) * Om(m) / eps &
+ rho(m,i,b) * rho(m,a,j) * Om(m) / eps
end do end do
mat_tmp = mat_tmp + 4.d0 * lambda * chi mat_tmp = mat_tmp + 4.d0 * lambda * chi
W(ab,state) = W(ab,state) - mat_tmp * U(ij,state) H_mat(ij,bb) = -mat_tmp
enddo enddo ! j
enddo enddo ! i
enddo ! state
enddo ! b enddo ! b
call dgemm("T", "N", nOrb-nR-a, n_states_diag, nOO+nVV, 1.d0, &
H_mat(1,1), size(H_mat, 1), U(1,1), size(U, 1), &
0.d0, W(aa+a+1,1), size(W, 1))
enddo ! a enddo ! a
!$OMP END DO
deallocate(H_mat)
!$OMP END PARALLEL
! --- ! ---
ij = nVV i0 = nO - nC - 1
allocate(H_mat(nOO,nOO+nVV))
!$OMP PARALLEL &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i, j, ii, ij, a, b, ab, k, l, kl, m, state, &
!$OMP tmp_e, tmp_ij, chi, mat_tmp) &
!$OMP SHARED(nC, nO, nOrb, nR, nS, nVV, i0, &
!$OMP eF, lambda, e, Om_tmp, rho, ERI, U, H_mat)
!$OMP DO SCHEDULE(GUIDED)
do i = nC+1, nO do i = nC+1, nO
ii = i0 * (i - nC - 1) - (i - nC - 1) * (i - nC) / 2 - nC - 1
do j = i+1, nO do j = i+1, nO
ij = ij + 1 ij = ii + j
do state = 1, n_states_diag tmp_e = e(i) + e(j) - eF
W(ij,state) = 0.d0
ab = 0 ab = 0
do a = nO+1, nOrb-nR do a = nO+1, nOrb-nR
do b = a+1, nOrb-nR do b = a+1, nOrb-nR
ab = ab + 1 ab = ab + 1
mat_tmp = lambda * (ERI(a,b,i,j) - ERI(a,b,j,i)) mat_tmp = lambda * (ERI(b,a,j,i) - ERI(b,a,i,j))
chi = 0.d0 chi = 0.d0
do m = 1, nS do m = 1, nS
eps = Om(m)**2 + eta2 chi = chi - Om_tmp(m) * (rho(m,i,a) * rho(m,j,b) - rho(m,i,b) * rho(m,a,j))
chi = chi - rho(m,i,a) * rho(m,j,b) * Om(m) / eps &
+ rho(m,i,b) * rho(m,a,j) * Om(m) / eps
enddo enddo
mat_tmp = mat_tmp + 4.d0 * lambda * chi mat_tmp = mat_tmp + 4.d0 * lambda * chi
W(ij,state) = W(ij,state) + mat_tmp * U(ab,state) H_mat(ij,ab) = mat_tmp
enddo enddo ! b
enddo enddo ! a
kl = nVV kl = nVV
do k = nC+1, nO do k = nC+1, nO
do l = k+1, nO do l = k+1, nO
kl = kl + 1 kl = kl + 1
mat_tmp = - (e(i) + e(j) - eF) * Kronecker_delta(i, k) * Kronecker_delta(j, l) & mat_tmp = lambda * (ERI(l,k,j,i) - ERI(l,k,i,j))
+ lambda * (ERI(i,j,k,l) - ERI(i,j,l,k))
chi = 0.d0 chi = 0.d0
do m = 1, nS do m = 1, nS
eps = Om(m)**2 + eta2 chi = chi - Om_tmp(m) * (rho(m,i,k) * rho(m,j,l) - rho(m,i,l) * rho(m,j,k))
chi = chi - rho(m,i,k) * rho(m,j,l) * Om(m) / eps &
+ rho(m,i,l) * rho(m,j,k) * Om(m) / eps
enddo enddo
mat_tmp = mat_tmp + 4.d0 * lambda * chi mat_tmp = mat_tmp + 4.d0 * lambda * chi
W(ij,state) = W(ij,state) - mat_tmp * U(kl,state) if((i .eq. k) .and. (j .eq. l)) then
enddo mat_tmp = mat_tmp - tmp_e
enddo endif
enddo ! state H_mat(ij,kl) = -mat_tmp
enddo ! l
enddo ! k
enddo ! j enddo ! j
enddo ! i enddo ! i
!$OMP END DO
!$OMP END PARALLEL
call dgemm("N", "N", nOO, n_states_diag, nOO+nVV, 1.d0, &
H_mat(1,1), size(H_mat, 1), U(1,1), size(U, 1), &
0.d0, W(nVV+1,1), size(W, 1))
deallocate(H_mat)
deallocate(Om_tmp)
! ---
else else

View File

@ -158,6 +158,7 @@ subroutine ppLR_davidson(ispin, TDA, nC, nO, nR, nOrb, nOO, nVV, lambda, e, eF,
! print*, ab, H_diag(ab) ! print*, ab, H_diag(ab)
!enddo !enddo
! TODO: improve guess
! initialize guess ! initialize guess
R = 0.d0 R = 0.d0
do k = 1, n_states do k = 1, n_states