10
0
mirror of https://github.com/QuantumPackage/qp2.git synced 2025-01-05 10:59:45 +01:00

Vectorizing integrals

This commit is contained in:
Anthony Scemama 2022-11-20 17:54:04 +01:00
parent c1bdfe7e93
commit 76eeade4d5
5 changed files with 126 additions and 75 deletions

View File

@ -160,7 +160,7 @@ subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points)
implicit none implicit none
integer, intent(in) :: i, j, n_points integer, intent(in) :: i, j, n_points
double precision, intent(in) :: D_center(3,n_points), delta double precision, intent(in) :: D_center(n_points,3), delta
double precision, intent(out) :: resv(n_points) double precision, intent(out) :: resv(n_points)
integer :: power_A(3), power_B(3), l, k integer :: power_A(3), power_B(3), l, k
@ -284,7 +284,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j,
implicit none implicit none
integer, intent(in) :: i, j, n_points integer, intent(in) :: i, j, n_points
double precision, intent(in) :: B_center(3), beta, D_center(3,n_points), delta double precision, intent(in) :: B_center(3), beta, D_center(n_points,3), delta
double precision, intent(out) :: resv(n_points) double precision, intent(out) :: resv(n_points)
integer :: power_A1(3), power_A2(3), l, k integer :: power_A1(3), power_A2(3), l, k
@ -321,19 +321,19 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j,
A1_center(1:3) = nucl_coord(ao_nucl(i),1:3) A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
A2_center(1:3) = nucl_coord(ao_nucl(j),1:3) A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
allocate (fact_g(n_points), G_center(3,n_points), analytical_j(n_points) ) allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) )
bg = beta * gama_inv bg = beta * gama_inv
dg = delta * gama_inv dg = delta * gama_inv
bdg = bg * delta bdg = bg * delta
do ipoint=1,n_points do ipoint=1,n_points
G_center(1,ipoint) = bg * B_center(1) + dg * D_center(1,ipoint) G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1)
G_center(2,ipoint) = bg * B_center(2) + dg * D_center(2,ipoint) G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2)
G_center(3,ipoint) = bg * B_center(3) + dg * D_center(3,ipoint) G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3)
fact_g(ipoint) = bdg * ( & fact_g(ipoint) = bdg * ( &
(B_center(1) - D_center(1,ipoint)) * (B_center(1) - D_center(1,ipoint)) & (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1)) &
+ (B_center(2) - D_center(2,ipoint)) * (B_center(2) - D_center(2,ipoint)) & + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2)) &
+ (B_center(3) - D_center(3,ipoint)) * (B_center(3) - D_center(3,ipoint)) ) + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
if(fact_g(ipoint) < 10d0) then if(fact_g(ipoint) < 10d0) then
fact_g(ipoint) = dexp(-fact_g(ipoint)) fact_g(ipoint) = dexp(-fact_g(ipoint))

View File

@ -19,7 +19,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
double precision, allocatable :: int_fit_v(:) double precision, allocatable :: int_fit_v(:)
double precision, external :: overlap_gauss_r12_ao_with1s double precision, external :: overlap_gauss_r12_ao_with1s
provide mu_erf final_grid_points j1b_pen provide mu_erf final_grid_points_transp j1b_pen
call wall_time(wall0) call wall_time(wall0)
allocate(int_fit_v(n_points_final_grid)) allocate(int_fit_v(n_points_final_grid))
@ -29,7 +29,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
!$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
!$OMP coef_fit, expo_fit, int_fit_v, tmp) & !$OMP coef_fit, expo_fit, int_fit_v, tmp) &
!$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,&
!$OMP final_grid_points, n_max_fit_slat, & !$OMP final_grid_points_transp, n_max_fit_slat, &
!$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, &
!$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, &
!$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,& !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,&
@ -55,7 +55,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
expo_fit = expo_gauss_1_erf_x_2(i_fit) expo_fit = expo_gauss_1_erf_x_2(i_fit)
coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef
call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points, expo_fit, i, j, int_fit_v, n_points_final_grid) call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, expo_fit, i, j, int_fit_v, n_points_final_grid)
do ipoint = 1, n_points_final_grid do ipoint = 1, n_points_final_grid
int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)

View File

@ -56,7 +56,7 @@ end
!--- !---
subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points)
BEGIN_DOC BEGIN_DOC
! Computes the following integral : ! Computes the following integral :
! !
@ -70,59 +70,66 @@ subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,
implicit none implicit none
include 'constants.include.F' include 'constants.include.F'
integer, intent(in) :: n_points integer, intent(in) :: n_points
double precision, intent(in) :: D_center(3,n_points), delta ! pure gaussian "D" double precision, intent(in) :: D_center_(n_points,3), delta ! pure gaussian "D"
double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
integer, intent(in) :: power_A(3),power_B(3) integer, intent(in) :: power_A(3),power_B(3)
double precision, intent(out) :: rvec(n_points) double precision, intent(out) :: rvec(n_points)
double precision :: overlap_x,overlap_y,overlap_z,overlap double precision, allocatable :: overlap(:)
double precision :: overlap_x, overlap_y, overlap_z
integer :: maxab integer :: maxab
integer, allocatable :: iorder_a_new(:) integer, allocatable :: iorder_a_new(:)
double precision, allocatable :: A_new(:,:,:), A_center_new(:,:) double precision, allocatable :: A_new(:,:,:), A_center_new(:,:)
double precision, allocatable :: fact_a_new(:) double precision, allocatable :: fact_a_new(:)
double precision :: alpha_new double precision :: alpha_new
double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr double precision :: accu,thr, coefxy
integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint
dim1=100 dim1=100
thr = 1.d-10 thr = 1.d-10
d(:) = 0 d(:) = 0
! maxab = maxval(d(1:3)) maxab = maxval(d(1:3))
maxab = max_dim
double precision, allocatable :: D_center(:,:)
allocate(D_center(3,n_points))
D_center(1,1:n_points) = D_center_(1:n_points,1)
D_center(2,1:n_points) = D_center_(1:n_points,2)
D_center(3,1:n_points) = D_center_(1:n_points,3)
allocate (A_new(0:maxab, 3, n_points), A_center_new(3, n_points), & allocate (A_new(0:maxab, 3, n_points), A_center_new(3, n_points), &
fact_a_new(n_points), iorder_a_new(3)) fact_a_new(n_points), iorder_a_new(3), overlap(n_points) )
call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, &
alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, & alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, &
D_center, A_center, n_points) D_center, A_center, n_points)
do ipoint=1,n_points do ipoint=1,n_points
rvec(ipoint) = 0.d0
enddo
! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
accu = 0.d0
do lx = 0, iorder_a_new(1) do lx = 0, iorder_a_new(1)
coefx = A_new(lx,1,ipoint)
if(dabs(coefx).lt.thr)cycle
iorder_tmp(1) = lx iorder_tmp(1) = lx
do ly = 0, iorder_a_new(2) do ly = 0, iorder_a_new(2)
coefy = A_new(ly,2,ipoint)
coefxy = coefx * coefy
if(dabs(coefxy).lt.thr)cycle
iorder_tmp(2) = ly iorder_tmp(2) = ly
do lz = 0, iorder_a_new(3) do lz = 0, iorder_a_new(3)
coefz = A_new(lz,3,ipoint)
coefxyz = coefxy * coefz
if(dabs(coefxyz).lt.thr)cycle
iorder_tmp(3) = lz iorder_tmp(3) = lz
call overlap_gaussian_xyz(A_center_new(1,ipoint),B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points)
accu += coefxyz * overlap do ipoint=1,n_points
rvec(ipoint) = rvec(ipoint) + A_new(lx,1,ipoint) * &
A_new(ly,2,ipoint) * &
A_new(lz,3,ipoint) * overlap(ipoint)
enddo enddo
enddo enddo
enddo enddo
rvec(ipoint) = fact_a_new(ipoint) * accu enddo
end do
do ipoint=1,n_points
rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint)
enddo
deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap)
end end
!--- !---

View File

@ -403,6 +403,46 @@ subroutine gaussian_product_x(a,xa,b,xb,k,p,xp)
end subroutine end subroutine
!-
subroutine gaussian_product_x_v(a,xa,b,xb,k,p,xp,n_points)
implicit none
BEGIN_DOC
! Gaussian product in 1D with multiple xa
! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
END_DOC
integer, intent(in) :: n_points
double precision , intent(in) :: a,b ! Exponents
double precision , intent(in) :: xa(n_points),xb ! Centers
double precision , intent(out) :: p(n_points) ! New exponent
double precision , intent(out) :: xp(n_points) ! New center
double precision , intent(out) :: k(n_points) ! Constant
double precision :: p_inv
integer :: ipoint
ASSERT (a>0.)
ASSERT (b>0.)
double precision :: xab, ab
p = a+b
p_inv = 1.d0/(a+b)
ab = a*b*p_inv
do ipoint = 1, n_points
xab = xa(ipoint)-xb
k(ipoint) = ab*xab*xab
if (k(ipoint) > 40.d0) then
k(ipoint)=0.d0
cycle
endif
k(ipoint) = exp(-k(ipoint))
xp(ipoint) = (a*xa(ipoint)+b*xb)*p_inv
enddo
end subroutine

View File

@ -154,7 +154,7 @@ end
! --- ! ---
subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,&
power_B,overlap_x,overlap_y,overlap_z,overlap,dim, n_points) power_B,overlap,dim, n_points)
implicit none implicit none
BEGIN_DOC BEGIN_DOC
!.. math:: !.. math::
@ -165,53 +165,57 @@ subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,&
END_DOC END_DOC
include 'constants.include.F' include 'constants.include.F'
integer,intent(in) :: dim, n_points integer,intent(in) :: dim, n_points
double precision,intent(in) :: A_center(3),B_center(3) ! center of the x1 functions double precision,intent(in) :: A_center(3,n_points),B_center(3) ! center of the x1 functions
double precision, intent(in) :: alpha,beta double precision, intent(in) :: alpha,beta
integer,intent(in) :: power_A(3), power_B(3) ! power of the x1 functions integer,intent(in) :: power_A(3), power_B(3) ! power of the x1 functions
double precision, intent(out) :: overlap_x(n_points),overlap_y(n_points),overlap_z(n_points),overlap(n_points) double precision, intent(out) :: overlap(n_points)
double precision :: P_new(0:max_dim,3),P_center(3),fact_p,p
double precision :: F_integral_tab(0:max_dim) double precision :: F_integral_tab(0:max_dim)
integer :: iorder_p(3) double precision :: p, overlap_x, overlap_y, overlap_z
double precision, allocatable :: P_new(:,:,:),P_center(:,:),fact_p(:), fact_pp(:), pp(:)
call give_explicit_poly_and_gaussian(P_new,P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,dim) integer :: iorder_p(3), ipoint, ldp
if(fact_p.lt.1d-20)then
overlap_x = 1.d-10
overlap_y = 1.d-10
overlap_z = 1.d-10
overlap = 1.d-10
return
endif
integer :: nmax integer :: nmax
double precision :: F_integral double precision :: F_integral
ldp = max_dim
allocate(P_new(0:ldp,3,n_points), P_center(3,n_points), fact_p(n_points), &
fact_pp(n_points), pp(n_points))
call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points)
nmax = maxval(iorder_p) nmax = maxval(iorder_p)
do i = 0,nmax do i=0, nmax
F_integral_tab(i) = F_integral(i,p) F_integral_tab(i) = F_integral(i,p)
enddo enddo
overlap_x = P_new(0,1) * F_integral_tab(0)
overlap_y = P_new(0,2) * F_integral_tab(0)
overlap_z = P_new(0,3) * F_integral_tab(0)
integer :: i integer :: i
call gaussian_product_v(alpha,A_center,beta,B_center,fact_pp,pp,P_center,n_points)
do ipoint=1,n_points
if(fact_p(ipoint).lt.1d-20)then
overlap(ipoint) = 1.d-10
cycle
endif
overlap_x = P_new(0,1,ipoint) * F_integral_tab(0)
do i = 1,iorder_p(1) do i = 1,iorder_p(1)
overlap_x = overlap_x + P_new(i,1) * F_integral_tab(i) overlap_x = overlap_x + P_new(i,1,ipoint) * F_integral_tab(i)
enddo enddo
call gaussian_product_x(alpha,A_center(1),beta,B_center(1),fact_p,p,P_center(1))
overlap_x *= fact_p
overlap_y = P_new(0,2,ipoint) * F_integral_tab(0)
do i = 1,iorder_p(2) do i = 1,iorder_p(2)
overlap_y = overlap_y + P_new(i,2) * F_integral_tab(i) overlap_y = overlap_y + P_new(i,2,ipoint) * F_integral_tab(i)
enddo enddo
call gaussian_product_x(alpha,A_center(2),beta,B_center(2),fact_p,p,P_center(2))
overlap_y *= fact_p
overlap_z = P_new(0,3,ipoint) * F_integral_tab(0)
do i = 1,iorder_p(3) do i = 1,iorder_p(3)
overlap_z = overlap_z + P_new(i,3) * F_integral_tab(i) overlap_z = overlap_z + P_new(i,3,ipoint) * F_integral_tab(i)
enddo enddo
call gaussian_product_x(alpha,A_center(3),beta,B_center(3),fact_p,p,P_center(3))
overlap_z *= fact_p
overlap = overlap_x * overlap_y * overlap_z overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint)
enddo
deallocate(P_new, P_center, fact_p, pp, fact_pp)
end end
! --- ! ---