From 76eeade4d5d5908ebbb971a2e3bdbd0d065848ee Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 20 Nov 2022 17:54:04 +0100 Subject: [PATCH] Vectorizing integrals --- src/ao_many_one_e_ints/ao_gaus_gauss.irp.f | 18 ++--- src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 6 +- .../prim_int_gauss_gauss.irp.f | 61 ++++++++------- src/utils/integration.irp.f | 40 ++++++++++ src/utils/one_e_integration.irp.f | 76 ++++++++++--------- 5 files changed, 126 insertions(+), 75 deletions(-) diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f index a657d6b1..56575b54 100644 --- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f +++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f @@ -160,7 +160,7 @@ subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points) implicit none integer, intent(in) :: i, j, n_points - double precision, intent(in) :: D_center(3,n_points), delta + double precision, intent(in) :: D_center(n_points,3), delta double precision, intent(out) :: resv(n_points) integer :: power_A(3), power_B(3), l, k @@ -284,7 +284,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, implicit none integer, intent(in) :: i, j, n_points - double precision, intent(in) :: B_center(3), beta, D_center(3,n_points), delta + double precision, intent(in) :: B_center(3), beta, D_center(n_points,3), delta double precision, intent(out) :: resv(n_points) integer :: power_A1(3), power_A2(3), l, k @@ -321,19 +321,19 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, A1_center(1:3) = nucl_coord(ao_nucl(i),1:3) A2_center(1:3) = nucl_coord(ao_nucl(j),1:3) - allocate (fact_g(n_points), G_center(3,n_points), analytical_j(n_points) ) + allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) ) bg = beta * gama_inv dg = delta * gama_inv bdg = bg * delta do ipoint=1,n_points - G_center(1,ipoint) = bg * B_center(1) + dg * D_center(1,ipoint) - G_center(2,ipoint) = bg * B_center(2) + dg * D_center(2,ipoint) - G_center(3,ipoint) = bg * B_center(3) + dg * D_center(3,ipoint) + G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1) + G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2) + G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3) fact_g(ipoint) = bdg * ( & - (B_center(1) - D_center(1,ipoint)) * (B_center(1) - D_center(1,ipoint)) & - + (B_center(2) - D_center(2,ipoint)) * (B_center(2) - D_center(2,ipoint)) & - + (B_center(3) - D_center(3,ipoint)) * (B_center(3) - D_center(3,ipoint)) ) + (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1)) & + + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2)) & + + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) ) if(fact_g(ipoint) < 10d0) then fact_g(ipoint) = dexp(-fact_g(ipoint)) diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f index 6875bc7a..272371c8 100644 --- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f +++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f @@ -19,7 +19,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n double precision, allocatable :: int_fit_v(:) double precision, external :: overlap_gauss_r12_ao_with1s - provide mu_erf final_grid_points j1b_pen + provide mu_erf final_grid_points_transp j1b_pen call wall_time(wall0) allocate(int_fit_v(n_points_final_grid)) @@ -29,7 +29,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,& !$OMP coef_fit, expo_fit, int_fit_v, tmp) & !$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,& - !$OMP final_grid_points, n_max_fit_slat, & + !$OMP final_grid_points_transp, n_max_fit_slat, & !$OMP expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2, & !$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, & !$OMP List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,& @@ -55,7 +55,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n expo_fit = expo_gauss_1_erf_x_2(i_fit) coef_fit = -0.25d0 * coef_gauss_1_erf_x_2(i_fit) * coef - call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points, expo_fit, i, j, int_fit_v, n_points_final_grid) + call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, expo_fit, i, j, int_fit_v, n_points_final_grid) do ipoint = 1, n_points_final_grid int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint) diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f index 0da39561..ae62a86c 100644 --- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f @@ -56,7 +56,7 @@ end !--- -subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) +subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) BEGIN_DOC ! Computes the following integral : ! @@ -70,59 +70,66 @@ subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B, implicit none include 'constants.include.F' integer, intent(in) :: n_points - double precision, intent(in) :: D_center(3,n_points), delta ! pure gaussian "D" + double precision, intent(in) :: D_center_(n_points,3), delta ! pure gaussian "D" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" integer, intent(in) :: power_A(3),power_B(3) double precision, intent(out) :: rvec(n_points) - double precision :: overlap_x,overlap_y,overlap_z,overlap + double precision, allocatable :: overlap(:) + double precision :: overlap_x, overlap_y, overlap_z integer :: maxab integer, allocatable :: iorder_a_new(:) double precision, allocatable :: A_new(:,:,:), A_center_new(:,:) double precision, allocatable :: fact_a_new(:) double precision :: alpha_new - double precision :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr + double precision :: accu,thr, coefxy integer :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint dim1=100 thr = 1.d-10 d(:) = 0 -! maxab = maxval(d(1:3)) - maxab = max_dim + maxab = maxval(d(1:3)) + + double precision, allocatable :: D_center(:,:) + allocate(D_center(3,n_points)) + D_center(1,1:n_points) = D_center_(1:n_points,1) + D_center(2,1:n_points) = D_center_(1:n_points,2) + D_center(3,1:n_points) = D_center_(1:n_points,3) + + allocate (A_new(0:maxab, 3, n_points), A_center_new(3, n_points), & - fact_a_new(n_points), iorder_a_new(3)) + fact_a_new(n_points), iorder_a_new(3), overlap(n_points) ) call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, & D_center, A_center, n_points) do ipoint=1,n_points + rvec(ipoint) = 0.d0 + enddo - ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2 - accu = 0.d0 - do lx = 0, iorder_a_new(1) - coefx = A_new(lx,1,ipoint) - if(dabs(coefx).lt.thr)cycle - iorder_tmp(1) = lx - do ly = 0, iorder_a_new(2) - coefy = A_new(ly,2,ipoint) - coefxy = coefx * coefy - if(dabs(coefxy).lt.thr)cycle - iorder_tmp(2) = ly - do lz = 0, iorder_a_new(3) - coefz = A_new(lz,3,ipoint) - coefxyz = coefxy * coefz - if(dabs(coefxyz).lt.thr)cycle - iorder_tmp(3) = lz - call overlap_gaussian_xyz(A_center_new(1,ipoint),B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1) - accu += coefxyz * overlap + do lx = 0, iorder_a_new(1) + iorder_tmp(1) = lx + do ly = 0, iorder_a_new(2) + iorder_tmp(2) = ly + do lz = 0, iorder_a_new(3) + iorder_tmp(3) = lz + call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points) + do ipoint=1,n_points + rvec(ipoint) = rvec(ipoint) + A_new(lx,1,ipoint) * & + A_new(ly,2,ipoint) * & + A_new(lz,3,ipoint) * overlap(ipoint) enddo enddo enddo - rvec(ipoint) = fact_a_new(ipoint) * accu - end do + enddo + + do ipoint=1,n_points + rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint) + enddo + deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap) end !--- diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index fd9e233b..7bf73c15 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -403,6 +403,46 @@ subroutine gaussian_product_x(a,xa,b,xb,k,p,xp) end subroutine +!- + +subroutine gaussian_product_x_v(a,xa,b,xb,k,p,xp,n_points) + implicit none + BEGIN_DOC + ! Gaussian product in 1D with multiple xa + ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2} + END_DOC + + integer, intent(in) :: n_points + double precision , intent(in) :: a,b ! Exponents + double precision , intent(in) :: xa(n_points),xb ! Centers + double precision , intent(out) :: p(n_points) ! New exponent + double precision , intent(out) :: xp(n_points) ! New center + double precision , intent(out) :: k(n_points) ! Constant + + double precision :: p_inv + integer :: ipoint + + ASSERT (a>0.) + ASSERT (b>0.) + + double precision :: xab, ab + + p = a+b + p_inv = 1.d0/(a+b) + ab = a*b*p_inv + do ipoint = 1, n_points + xab = xa(ipoint)-xb + k(ipoint) = ab*xab*xab + if (k(ipoint) > 40.d0) then + k(ipoint)=0.d0 + cycle + endif + k(ipoint) = exp(-k(ipoint)) + xp(ipoint) = (a*xa(ipoint)+b*xb)*p_inv + enddo +end subroutine + + diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index e175042b..ef0c4ffc 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -154,7 +154,7 @@ end ! --- subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& - power_B,overlap_x,overlap_y,overlap_z,overlap,dim, n_points) + power_B,overlap,dim, n_points) implicit none BEGIN_DOC !.. math:: @@ -165,53 +165,57 @@ subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& END_DOC include 'constants.include.F' integer,intent(in) :: dim, n_points - double precision,intent(in) :: A_center(3),B_center(3) ! center of the x1 functions + double precision,intent(in) :: A_center(3,n_points),B_center(3) ! center of the x1 functions double precision, intent(in) :: alpha,beta integer,intent(in) :: power_A(3), power_B(3) ! power of the x1 functions - double precision, intent(out) :: overlap_x(n_points),overlap_y(n_points),overlap_z(n_points),overlap(n_points) - double precision :: P_new(0:max_dim,3),P_center(3),fact_p,p + double precision, intent(out) :: overlap(n_points) double precision :: F_integral_tab(0:max_dim) - integer :: iorder_p(3) - - call give_explicit_poly_and_gaussian(P_new,P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,dim) - if(fact_p.lt.1d-20)then - overlap_x = 1.d-10 - overlap_y = 1.d-10 - overlap_z = 1.d-10 - overlap = 1.d-10 - return - endif + double precision :: p, overlap_x, overlap_y, overlap_z + double precision, allocatable :: P_new(:,:,:),P_center(:,:),fact_p(:), fact_pp(:), pp(:) + integer :: iorder_p(3), ipoint, ldp integer :: nmax double precision :: F_integral + + ldp = max_dim + allocate(P_new(0:ldp,3,n_points), P_center(3,n_points), fact_p(n_points), & + fact_pp(n_points), pp(n_points)) + + call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points) + nmax = maxval(iorder_p) - do i = 0,nmax + do i=0, nmax F_integral_tab(i) = F_integral(i,p) enddo - overlap_x = P_new(0,1) * F_integral_tab(0) - overlap_y = P_new(0,2) * F_integral_tab(0) - overlap_z = P_new(0,3) * F_integral_tab(0) integer :: i - do i = 1,iorder_p(1) - overlap_x = overlap_x + P_new(i,1) * F_integral_tab(i) + + call gaussian_product_v(alpha,A_center,beta,B_center,fact_pp,pp,P_center,n_points) + + do ipoint=1,n_points + if(fact_p(ipoint).lt.1d-20)then + overlap(ipoint) = 1.d-10 + cycle + endif + + overlap_x = P_new(0,1,ipoint) * F_integral_tab(0) + do i = 1,iorder_p(1) + overlap_x = overlap_x + P_new(i,1,ipoint) * F_integral_tab(i) + enddo + + overlap_y = P_new(0,2,ipoint) * F_integral_tab(0) + do i = 1,iorder_p(2) + overlap_y = overlap_y + P_new(i,2,ipoint) * F_integral_tab(i) + enddo + + overlap_z = P_new(0,3,ipoint) * F_integral_tab(0) + do i = 1,iorder_p(3) + overlap_z = overlap_z + P_new(i,3,ipoint) * F_integral_tab(i) + enddo + + overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint) enddo - call gaussian_product_x(alpha,A_center(1),beta,B_center(1),fact_p,p,P_center(1)) - overlap_x *= fact_p - - do i = 1,iorder_p(2) - overlap_y = overlap_y + P_new(i,2) * F_integral_tab(i) - enddo - call gaussian_product_x(alpha,A_center(2),beta,B_center(2),fact_p,p,P_center(2)) - overlap_y *= fact_p - - do i = 1,iorder_p(3) - overlap_z = overlap_z + P_new(i,3) * F_integral_tab(i) - enddo - call gaussian_product_x(alpha,A_center(3),beta,B_center(3),fact_p,p,P_center(3)) - overlap_z *= fact_p - - overlap = overlap_x * overlap_y * overlap_z + deallocate(P_new, P_center, fact_p, pp, fact_pp) end ! ---