diff --git a/src/GW/GW_ppBSE_static_kernel_C.f90 b/src/GW/GW_ppBSE_static_kernel_C.f90 index 534654c..2df0025 100644 --- a/src/GW/GW_ppBSE_static_kernel_C.f90 +++ b/src/GW/GW_ppBSE_static_kernel_C.f90 @@ -31,6 +31,8 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI integer :: a0, aa double precision, allocatable :: Om_tmp(:) + double precision, allocatable :: tmp_m(:,:,:) + double precision, allocatable :: tmp(:,:,:,:) ! Output variables @@ -46,19 +48,27 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI lambda4 = 4.d0 * lambda eta2 = eta * eta - allocate(Om_tmp(nS)) + allocate(tmp_m(nBas,nBas,nS)) + allocate(tmp(nBas,nBas,nBas,nBas)) - !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp) - !$OMP DO do m = 1, nS - Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2) + eps = Om(m) / (Om(m)*Om(m) + eta2) + do c = 1, nBas + do a = 1, nBas + tmp_m(a,c,m) = eps * rho(a,c,m) + enddo + enddo enddo - !$OMP END DO - !$OMP END PARALLEL - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m, tmp_ab) & - !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC) + call dgemm("N", "T", nBas*nBas, nBas*nBas, nS, 1.d0, & + tmp_m(1,1,1), nBas*nBas, rho(1,1,1), nBas*nBas, & + 0.d0, tmp(1,1,1,1), nBas*nBas) + + deallocate(tmp_m) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(a, b, aa, ab, c, d, cd, tmp_ab) & + !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, tmp, KC) !$OMP DO do a = nO+1, nBas-nR aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO @@ -75,13 +85,7 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI do d = c, nBas-nR cd = cd + 1 - KC(ab,cd) = 0d0 - do m = 1, nS - KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) & - - rho(a,d,m) * rho(b,c,m) * Om_tmp(m) - end do - - KC(ab,cd) = tmp_ab * KC(ab,cd) + KC(ab,cd) = -tmp_ab * (tmp(a,c,b,d) + tmp(a,d,b,c)) if(c .eq. d) then KC(ab,cd) = 0.7071067811865475d0 * KC(ab,cd) endif @@ -92,8 +96,87 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI !$OMP END DO !$OMP END PARALLEL - deallocate(Om_tmp) + deallocate(tmp) + +! do a=nO+1,nBas-nR +! do b=a,nBas-nR +! ab = ab + 1 +! cd = 0 +! do c=nO+1,nBas-nR +! do d=c,nBas-nR +! cd = cd + 1 +! +! chi = 0d0 +! do m=1,nS +! eps = Om(m)**2 + eta**2 +! chi = chi - rho(a,c,m)*rho(b,d,m)*Om(m)/eps & +! - rho(a,d,m)*rho(b,c,m)*Om(m)/eps +! end do + + +! --- --- --- +! OpenMP implementation +! --- --- --- +! +! a0 = nBas - nR - nO +! lambda4 = 4.d0 * lambda +! eta2 = eta * eta +! +! allocate(Om_tmp(nS)) +! +! !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp) +! !$OMP DO +! do m = 1, nS +! Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2) +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m, tmp_ab) & +! !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC) +! !$OMP DO +! do a = nO+1, nBas-nR +! aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO +! do b = a, nBas-nR +! ab = aa + b +! +! tmp_ab = lambda4 +! if(a .eq. b) then +! tmp_ab = 0.7071067811865475d0 * lambda4 +! endif +! +! cd = 0 +! do c = nO+1, nBas-nR +! do d = c, nBas-nR +! cd = cd + 1 +! +! KC(ab,cd) = 0d0 +! do m = 1, nS +! KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) & +! - rho(a,d,m) * rho(b,c,m) * Om_tmp(m) +! end do +! +! KC(ab,cd) = tmp_ab * KC(ab,cd) +! if(c .eq. d) then +! KC(ab,cd) = 0.7071067811865475d0 * KC(ab,cd) +! endif +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! +! deallocate(Om_tmp) +! --- --- --- + + +! --- --- --- +! Naive implementation +! --- --- --- +! ! ab = 0 ! do a=nO+1,nBas-nR ! do b=a,nBas-nR @@ -116,6 +199,7 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI ! end do ! end do ! end do +! --- --- --- end if @@ -129,19 +213,27 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI lambda4 = 4.d0 * lambda eta2 = eta * eta - allocate(Om_tmp(nS)) + allocate(tmp_m(nBas,nBas,nS)) + allocate(tmp(nBas,nBas,nBas,nBas)) - !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp) - !$OMP DO do m = 1, nS - Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2) + eps = Om(m) / (Om(m)*Om(m) + eta2) + do c = 1, nBas + do a = 1, nBas + tmp_m(a,c,m) = eps * rho(a,c,m) + enddo + enddo enddo - !$OMP END DO - !$OMP END PARALLEL - !$OMP PARALLEL DEFAULT(NONE) & - !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m) & - !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC) + call dgemm("N", "T", nBas*nBas, nBas*nBas, nS, 1.d0, & + tmp_m(1,1,1), nBas*nBas, rho(1,1,1), nBas*nBas, & + 0.d0, tmp(1,1,1,1), nBas*nBas) + + deallocate(tmp_m) + + !$OMP PARALLEL DEFAULT(NONE) & + !$OMP PRIVATE(a, b, aa, ab, c, d, cd) & + !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, tmp, KC) !$OMP DO do a = nO+1, nBas-nR aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO - 1 @@ -153,13 +245,7 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI do d = c+1, nBas-nR cd = cd + 1 - KC(ab,cd) = 0d0 - do m = 1, nS - KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) & - + rho(a,d,m) * rho(b,c,m) * Om_tmp(m) - end do - - KC(ab,cd) = lambda4 * KC(ab,cd) + KC(ab,cd) = lambda4 * (-tmp(a,c,b,d) + tmp(a,d,b,c)) enddo enddo enddo @@ -167,8 +253,63 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI !$OMP END DO !$OMP END PARALLEL - deallocate(Om_tmp) + deallocate(tmp) + +! --- --- --- +! OpenMP implementation +! --- --- --- +! +! a0 = nBas - nR - nO - 1 +! lambda4 = 4.d0 * lambda +! eta2 = eta * eta +! +! allocate(Om_tmp(nS)) +! +! !$OMP PARALLEL DEFAULT(NONE) PRIVATE(m) SHARED(nS, eta2, Om, Om_tmp) +! !$OMP DO +! do m = 1, nS +! Om_tmp(m) = Om(m) / (Om(m)*Om(m) + eta2) +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! +! !$OMP PARALLEL DEFAULT(NONE) & +! !$OMP PRIVATE(a, b, aa, ab, c, d, cd, m) & +! !$OMP SHARED(nO, nBas, nR, nS, a0, lambda4, Om_tmp, rho, KC) +! !$OMP DO +! do a = nO+1, nBas-nR +! aa = a0 * (a - nO - 1) - (a - nO - 1) * (a - nO) / 2 - nO - 1 +! do b = a+1, nBas-nR +! ab = aa + b +! +! cd = 0 +! do c = nO+1, nBas-nR +! do d = c+1, nBas-nR +! cd = cd + 1 +! +! KC(ab,cd) = 0d0 +! do m = 1, nS +! KC(ab,cd) = KC(ab,cd) - rho(a,c,m) * rho(b,d,m) * Om_tmp(m) & +! + rho(a,d,m) * rho(b,c,m) * Om_tmp(m) +! end do +! +! KC(ab,cd) = lambda4 * KC(ab,cd) +! enddo +! enddo +! enddo +! enddo +! !$OMP END DO +! !$OMP END PARALLEL +! +! deallocate(Om_tmp) +! --- --- --- + + +! --- --- --- +! Naive implementation +! --- --- --- +! ! ab = 0 ! do a=nO+1,nBas-nR ! do b=a+1,nBas-nR @@ -191,6 +332,7 @@ subroutine GW_ppBSE_static_kernel_C(ispin,eta,nBas,nC,nO,nV,nR,nS,nVV,lambda,ERI ! end do ! end do ! end do +! --- --- --- end if