From 86b4ff44d91ba77d39da56a2b013d000bf921f53 Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Sun, 20 Nov 2022 22:50:14 +0100 Subject: [PATCH] Vectorized int2_grad1u2_grad2u2_j1b2 --- src/ao_many_one_e_ints/listj1b.irp.f | 2 +- .../prim_int_gauss_gauss.irp.f | 19 ++--- src/utils/integration.irp.f | 76 ++++++------------- src/utils/one_e_integration.irp.f | 16 ++-- 4 files changed, 39 insertions(+), 74 deletions(-) diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f index ced6f4e9..1178cc31 100644 --- a/src/ao_many_one_e_ints/listj1b.irp.f +++ b/src/ao_many_one_e_ints/listj1b.irp.f @@ -176,8 +176,8 @@ END_PROVIDER enddo - ASSERT(List_all_comb_b3_expo(i) .gt. 0d0) if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle + ASSERT(List_all_comb_b3_expo(i) .gt. 0d0) List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i) diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f index 4cba0965..1638aa9e 100644 --- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f +++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f @@ -56,7 +56,7 @@ end !--- -subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) +subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points) BEGIN_DOC ! Computes the following integral : ! @@ -70,7 +70,7 @@ subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B implicit none include 'constants.include.F' integer, intent(in) :: n_points - double precision, intent(in) :: D_center_(n_points,3), delta ! pure gaussian "D" + double precision, intent(in) :: D_center(n_points,3), delta ! pure gaussian "D" double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B" integer, intent(in) :: power_A(3),power_B(3) double precision, intent(out) :: rvec(n_points) @@ -92,14 +92,7 @@ subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B maxab = maxval(power_A(1:3)) - double precision, allocatable :: D_center(:,:) - allocate(D_center(3,n_points)) - D_center(1,1:n_points) = D_center_(1:n_points,1) - D_center(2,1:n_points) = D_center_(1:n_points,2) - D_center(3,1:n_points) = D_center_(1:n_points,3) - - - allocate (A_new(0:maxab, 3, n_points), A_center_new(3, n_points), & + allocate (A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), & fact_a_new(n_points), iorder_a_new(3), overlap(n_points) ) call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, & @@ -118,9 +111,9 @@ subroutine overlap_gauss_r12_v(D_center_,delta,A_center,B_center,power_A,power_B iorder_tmp(3) = lz call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points) do ipoint=1,n_points - rvec(ipoint) = rvec(ipoint) + A_new(lx,1,ipoint) * & - A_new(ly,2,ipoint) * & - A_new(lz,3,ipoint) * overlap(ipoint) + rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * & + A_new(ipoint,ly,2) * & + A_new(ipoint,lz,3) * overlap(ipoint) enddo enddo enddo diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f index 29c6d1ff..f68465c7 100644 --- a/src/utils/integration.irp.f +++ b/src/utils/integration.irp.f @@ -147,12 +147,12 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde integer, intent(in) :: n_points, ldp integer, intent(in) :: a(3),b(3) ! powers : (x-xa)**a_x = (x-A(1))**a(1) double precision, intent(in) :: alpha, beta ! exponents - double precision, intent(in) :: A_center(3,n_points) ! A center + double precision, intent(in) :: A_center(n_points,3) ! A center double precision, intent(in) :: B_center (3) ! B center - double precision, intent(out) :: P_center(3,n_points) ! new center + double precision, intent(out) :: P_center(n_points,3) ! new center double precision, intent(out) :: p ! new exponent double precision, intent(out) :: fact_k(n_points) ! constant factor - double precision, intent(out) :: P_new(0:ldp,3,n_points)! polynomial + double precision, intent(out) :: P_new(n_points,0:ldp,3)! polynomial integer, intent(out) :: iorder(3) ! i_order(i) = order of the polynomials double precision, allocatable :: P_a(:,:,:), P_b(:,:,:) @@ -161,82 +161,54 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points) -!TODO TRANSP -double precision, allocatable :: P_a_(:,:,:), P_b_(:,:,:), A_center_(:,:), P_center_(:,:), P_new_(:,:,:) -allocate(A_center_(n_points,3), P_center_(n_points,3), P_new_(n_points,0:ldp,3)) -A_center_(1:n_points,1) = A_center(1,1:n_points) -A_center_(1:n_points,2) = A_center(2,1:n_points) -A_center_(1:n_points,3) = A_center(3,1:n_points) -P_center_(1:n_points,1) = P_center(1,1:n_points) -P_center_(1:n_points,2) = P_center(2,1:n_points) -P_center_(1:n_points,3) = P_center(3,1:n_points) - if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then ! b == (0,0,0) - lda = maxval(a) ldb = 0 - allocate(P_a_(n_points,0:lda,3), P_b_(n_points,0:0,3)) + allocate(P_a(n_points,0:lda,3), P_b(n_points,0:0,3)) - - call recentered_poly2_v0(P_a_,lda,A_center_,P_center_,a,P_b_,B_center,P_center_,n_points) + call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points) iorder(1:3) = a(1:3) do ipoint=1,n_points do xyz=1,3 - P_new_(ipoint,0,xyz) = P_a_(ipoint,0,xyz) * P_b_(ipoint,0,xyz) + P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz) do i=1,a(xyz) - P_new_(ipoint,i,xyz) = P_new_(ipoint,i,xyz) + P_b_(ipoint,0,xyz) * P_a_(ipoint,i,xyz) + P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz) enddo enddo enddo -do ipoint=1,n_points -do i=0,ldp -P_new(i,1,ipoint) = P_new_(ipoint,i,1) -P_new(i,2,ipoint) = P_new_(ipoint,i,2) -P_new(i,3,ipoint) = P_new_(ipoint,i,3) -enddo -enddo return endif lda = maxval(a) ldb = maxval(b) - allocate(P_a_(n_points,0:lda,3), P_b_(n_points,0:ldb,3)) - - call recentered_poly2_v(P_a_,lda,A_center_,P_center_,a,P_b_,ldb,B_center,P_center_,b,n_points) + allocate(P_a(n_points,0:lda,3), P_b(n_points,0:ldb,3)) + call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points) iorder(1:3) = a(1:3) + b(1:3) do xyz=1,3 if (b(xyz) == 0) then do ipoint=1,n_points - P_new_(ipoint,0,xyz) = P_a_(ipoint,0,xyz) * P_b_(ipoint,0,xyz) + P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz) do i=1,a(xyz) - P_new_(ipoint,i,xyz) = P_new_(ipoint,i,xyz) + P_b_(ipoint,0,xyz) * P_a_(ipoint,i,xyz) + P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz) enddo enddo else do i=0,iorder(xyz) do ipoint=1,n_points - P_new_(ipoint,i,xyz) = 0.d0 + P_new(ipoint,i,xyz) = 0.d0 enddo enddo - call multiply_poly_v(P_a_(1,0,xyz), a(xyz),P_b_(1,0,xyz),b(xyz),P_new_(1,0,xyz),ldp,n_points) + call multiply_poly_v(P_a(1,0,xyz), a(xyz),P_b(1,0,xyz),b(xyz),P_new(1,0,xyz),ldp,n_points) endif enddo - -do ipoint=1,n_points -do i=0,ldp -P_new(i,1,ipoint) = P_new_(ipoint,i,1) -P_new(i,2,ipoint) = P_new_(ipoint,i,2) -P_new(i,3,ipoint) = P_new_(ipoint,i,3) -enddo -enddo end !- @@ -353,9 +325,9 @@ subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) integer, intent(in) :: n_points double precision, intent(in) :: a,b ! Exponents - double precision, intent(in) :: xa(3,n_points),xb(3) ! Centers + double precision, intent(in) :: xa(n_points,3),xb(3) ! Centers double precision, intent(out) :: p ! New exponent - double precision, intent(out) :: xp(3,n_points) ! New center + double precision, intent(out) :: xp(n_points,3) ! New center double precision, intent(out) :: k(n_points) ! Constant double precision :: p_inv @@ -377,20 +349,20 @@ subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points) bpxb(3) = bp*xb(3) do ipoint=1,n_points - xab(1) = xa(1,ipoint)-xb(1) - xab(2) = xa(2,ipoint)-xb(2) - xab(3) = xa(3,ipoint)-xb(3) + xab(1) = xa(ipoint,1)-xb(1) + xab(2) = xa(ipoint,2)-xb(2) + xab(3) = xa(ipoint,3)-xb(3) k(ipoint) = ab*(xab(1)*xab(1)+xab(2)*xab(2)+xab(3)*xab(3)) if (k(ipoint) > 40.d0) then k(ipoint)=0.d0 - xp(1,ipoint) = 0.d0 - xp(2,ipoint) = 0.d0 - xp(3,ipoint) = 0.d0 + xp(ipoint,1) = 0.d0 + xp(ipoint,2) = 0.d0 + xp(ipoint,3) = 0.d0 else k(ipoint) = dexp(-k(ipoint)) - xp(1,ipoint) = ap*xa(1,ipoint)+bpxb(1) - xp(2,ipoint) = ap*xa(2,ipoint)+bpxb(2) - xp(3,ipoint) = ap*xa(3,ipoint)+bpxb(3) + xp(ipoint,1) = ap*xa(ipoint,1)+bpxb(1) + xp(ipoint,2) = ap*xa(ipoint,2)+bpxb(2) + xp(ipoint,3) = ap*xa(ipoint,3)+bpxb(3) endif enddo end subroutine diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f index 2a32d1b9..a62c657e 100644 --- a/src/utils/one_e_integration.irp.f +++ b/src/utils/one_e_integration.irp.f @@ -165,7 +165,7 @@ subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& END_DOC include 'constants.include.F' integer,intent(in) :: dim, n_points - double precision,intent(in) :: A_center(3,n_points),B_center(3) ! center of the x1 functions + double precision,intent(in) :: A_center(n_points,3),B_center(3) ! center of the x1 functions double precision, intent(in) :: alpha,beta integer,intent(in) :: power_A(3), power_B(3) ! power of the x1 functions double precision, intent(out) :: overlap(n_points) @@ -177,7 +177,7 @@ subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& double precision :: F_integral ldp = maxval( power_A(1:3) + power_B(1:3) ) - allocate(P_new(0:ldp,3,n_points), P_center(3,n_points), fact_p(n_points), & + allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points), & fact_pp(n_points), pp(n_points)) call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points) @@ -197,19 +197,19 @@ subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,& cycle endif - overlap_x = P_new(0,1,ipoint) * F_integral_tab(0) + overlap_x = P_new(ipoint,0,1) * F_integral_tab(0) do i = 1,iorder_p(1) - overlap_x = overlap_x + P_new(i,1,ipoint) * F_integral_tab(i) + overlap_x = overlap_x + P_new(ipoint,i,1) * F_integral_tab(i) enddo - overlap_y = P_new(0,2,ipoint) * F_integral_tab(0) + overlap_y = P_new(ipoint,0,2) * F_integral_tab(0) do i = 1,iorder_p(2) - overlap_y = overlap_y + P_new(i,2,ipoint) * F_integral_tab(i) + overlap_y = overlap_y + P_new(ipoint,i,2) * F_integral_tab(i) enddo - overlap_z = P_new(0,3,ipoint) * F_integral_tab(0) + overlap_z = P_new(ipoint,0,3) * F_integral_tab(0) do i = 1,iorder_p(3) - overlap_z = overlap_z + P_new(i,3,ipoint) * F_integral_tab(i) + overlap_z = overlap_z + P_new(ipoint,i,3) * F_integral_tab(i) enddo overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint)