9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2024-12-22 19:43:32 +01:00

Vectorizing integrals.
Some checks failed
continuous-integration/drone/push Build is failing

This commit is contained in:
Anthony Scemama 2022-11-21 02:13:43 +01:00
parent 80b01d5947
commit dddd409008
3 changed files with 466 additions and 75 deletions

View File

@ -274,7 +274,82 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
end subroutine NAI_pol_x_mult_erf_ao
! ---
subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
BEGIN_DOC
!
! Computes the following integral :
!
! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
!
! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
!
! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
!
END_DOC
include 'utils/constants.include.F'
implicit none
integer, intent(in) :: i_ao, j_ao, n_points
double precision, intent(in) :: mu_in, C_center(n_points,3)
double precision, intent(out) :: ints(n_points,3)
integer :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in
integer :: power_xA(3), m, ipoint
double precision :: A_center(3), B_center(3), alpha, beta, coef
double precision, allocatable :: integral(:)
double precision :: NAI_pol_mult_erf
ints = 0.d0
if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then
return
endif
num_A = ao_nucl(i_ao)
power_A(1:3) = ao_power(i_ao,1:3)
A_center(1:3) = nucl_coord(num_A,1:3)
num_B = ao_nucl(j_ao)
power_B(1:3) = ao_power(j_ao,1:3)
B_center(1:3) = nucl_coord(num_B,1:3)
n_pt_in = n_pt_max_integrals
allocate(integral(n_points))
do i = 1, ao_prim_num(i_ao)
alpha = ao_expo_ordered_transp(i,i_ao)
do m = 1, 3
power_xA = power_A
! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
power_xA(m) += 1
do j = 1, ao_prim_num(j_ao)
beta = ao_expo_ordered_transp(j,j_ao)
coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
! First term = (x-Ax)**(ax+1)
call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points)
do ipoint=1,n_points
ints(ipoint,m) += integral(ipoint) * coef
enddo
! Second term = Ax * (x-Ax)**(ax)
call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points)
do ipoint=1,n_points
ints(ipoint,m) += A_center(m) * integral(ipoint) * coef
enddo
enddo
enddo
enddo
deallocate(integral)
end subroutine NAI_pol_x_mult_erf_ao_v
! ---
subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints)
BEGIN_DOC
@ -351,6 +426,91 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
end subroutine NAI_pol_x_mult_erf_ao_with1s
!--
subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_center, ints, n_points)
BEGIN_DOC
!
! Computes the following integral :
!
! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
!
! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
!
! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
!
END_DOC
include 'utils/constants.include.F'
implicit none
integer, intent(in) :: i_ao, j_ao, n_points
double precision, intent(in) :: beta, B_center(n_points,3), mu_in, C_center(n_points,3)
double precision, intent(out) :: ints(n_points,3)
integer :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
double precision :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
integer :: ipoint
double precision, allocatable :: integral(:)
if(beta .lt. 1d-10) then
call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
return
endif
ints(:,:) = 0.d0
if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
return
endif
power_Ai(1:3) = ao_power(i_ao,1:3)
power_Aj(1:3) = ao_power(j_ao,1:3)
Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
n_pt_in = n_pt_max_integrals
allocate(integral(n_points))
do i = 1, ao_prim_num(i_ao)
alphai = ao_expo_ordered_transp (i,i_ao)
coefi = ao_coef_normalized_ordered_transp(i,i_ao)
do m = 1, 3
! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
power_xA = power_Ai
power_xA(m) += 1
do j = 1, ao_prim_num(j_ao)
alphaj = ao_expo_ordered_transp (j,j_ao)
coef = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
! First term = (x-Ax)**(ax+1)
call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, &
alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points)
do ipoint = 1, n_points
ints(ipoint,m) += integral(ipoint) * coef
enddo
! Second term = Ax * (x-Ax)**(ax)
call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, &
alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points)
do ipoint = 1, n_points
ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef
enddo
enddo
enddo
enddo
deallocate(integral)
end subroutine NAI_pol_x_mult_erf_ao_with1s
! ---
subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints)

View File

@ -176,35 +176,40 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
implicit none
integer :: i, j, ipoint, i_1s, i_fit
double precision :: r(3), int_fit(3), expo_fit, coef_fit
double precision :: coef, beta, B_center(3), dist
double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
double precision :: r(3), expo_fit, coef_fit
double precision :: coef, beta, B_center(3)
double precision :: alpha_1s, alpha_1s_inv, expo_coef_1s, coef_tmp
double precision :: tmp_x, tmp_y, tmp_z
double precision :: wall0, wall1
double precision, allocatable :: int_fit_v(:,:), dist(:), centr_1s(:,:)
provide mu_erf final_grid_points_transp j1b_pen
call wall_time(wall0)
int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0
!$OMP PARALLEL DEFAULT (NONE) &
!$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
!$OMP coef_fit, expo_fit, int_fit, alpha_1s, dist, &
!$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, &
!$OMP tmp_x, tmp_y, tmp_z) &
!$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size, &
!$OMP final_grid_points_transp, n_max_fit_slat, &
!$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, &
!$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, &
!$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
!$OMP DO
allocate(dist(n_points_final_grid), centr_1s(n_points_final_grid,3))
do ipoint = 1, n_points_final_grid
r(1) = final_grid_points_transp(ipoint,1)
r(2) = final_grid_points_transp(ipoint,2)
r(3) = final_grid_points_transp(ipoint,3)
do i = 1, ao_num
do j = i, ao_num
dist(ipoint) = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+ (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+ (B_center(3) - r(3)) * (B_center(3) - r(3))
enddo
int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0
!$OMP PARALLEL DEFAULT (NONE) &
!$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
!$OMP coef_fit, expo_fit, int_fit_v, alpha_1s, &
!$OMP alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, &
!$OMP tmp_x, tmp_y, tmp_z) &
!$OMP SHARED (n_points_final_grid, ao_num, List_all_comb_b3_size,&
!$OMP final_grid_points_transp, n_max_fit_slat, dist, &
!$OMP expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf, &
!$OMP List_all_comb_b3_coef, List_all_comb_b3_expo, &
!$OMP List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
allocate(int_fit_v(n_points_final_grid,3))
do i_1s = 1, List_all_comb_b3_size
@ -213,39 +218,53 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
B_center(1) = List_all_comb_b3_cent(1,i_1s)
B_center(2) = List_all_comb_b3_cent(2,i_1s)
B_center(3) = List_all_comb_b3_cent(3,i_1s)
dist = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+ (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+ (B_center(3) - r(3)) * (B_center(3) - r(3))
do i_fit = 1, n_max_fit_slat
expo_fit = expo_gauss_j_mu_1_erf(i_fit)
coef_fit = coef_gauss_j_mu_1_erf(i_fit)
coef_fit = coef_gauss_j_mu_1_erf(i_fit) * coef
alpha_1s = beta + expo_fit
alpha_1s_inv = 1.d0 / alpha_1s
centr_1s(1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
centr_1s(2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
centr_1s(3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
do ipoint = 1, n_points_final_grid
r(1) = final_grid_points_transp(ipoint,1)
r(2) = final_grid_points_transp(ipoint,2)
r(3) = final_grid_points_transp(ipoint,3)
expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
centr_1s(ipoint,1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
centr_1s(ipoint,2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
centr_1s(ipoint,3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
enddo
call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
expo_coef_1s = beta * expo_fit * alpha_1s_inv
!$OMP BARRIER
!$OMP DO SCHEDULE(dynamic)
do i = 1, ao_num
do j = i, ao_num
call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s,&
1.d+9, final_grid_points_transp, int_fit_v, n_points_final_grid)
int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_tmp * int_fit(1)
int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_tmp * int_fit(2)
int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_tmp * int_fit(3)
do ipoint = 1, n_points_final_grid
coef_tmp = coef_fit * dexp(-expo_coef_1s* dist(ipoint))
int2_u_grad1u_x_j1b2(1,j,i,ipoint) = &
int2_u_grad1u_x_j1b2(1,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,1)
int2_u_grad1u_x_j1b2(2,j,i,ipoint) = &
int2_u_grad1u_x_j1b2(2,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,2)
int2_u_grad1u_x_j1b2(3,j,i,ipoint) = &
int2_u_grad1u_x_j1b2(3,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,3)
enddo
enddo
enddo
!$OMP END DO NOWAIT
enddo
enddo
enddo
!$OMP END DO
deallocate(int_fit_v)
!$OMP END PARALLEL
deallocate(dist)
do ipoint = 1, n_points_final_grid
do i = 2, ao_num
do j = 1, i-1

View File

@ -197,6 +197,92 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B,
end function NAI_pol_mult_erf
! ---
subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, res_v, n_points)
BEGIN_DOC
!
! Computes the following integral :
!
! .. math::
!
! \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
! \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
!
END_DOC
include 'utils/constants.include.F'
implicit none
integer, intent(in) :: n_pt_in, n_points
integer, intent(in) :: power_A(3), power_B(3)
double precision, intent(in) :: C_center(n_points,3), A_center(3), B_center(3), alpha, beta, mu_in
double precision, intent(out) :: res_v(n_points)
integer :: i, n_pt, n_pt_out, ipoint
double precision :: P_center(3)
double precision :: d(0:n_pt_in), coeff, dist, const, factor
double precision :: const_factor, dist_integral
double precision :: accu, p_inv, p, rho, p_inv_2
double precision :: p_new
double precision :: rint
p = alpha + beta
p_inv = 1.d0 / p
p_inv_2 = 0.5d0 * p_inv
rho = alpha * beta * p_inv
p_new = mu_in / dsqrt(p + mu_in * mu_in)
dist = 0.d0
do i = 1, 3
P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv
dist += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
enddo
do ipoint=1,n_points
dist_integral = 0.d0
do i = 1, 3
dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
enddo
const_factor = dist * rho
if(const_factor > 80.d0) then
res_V(ipoint) = 0.d0
cycle
endif
factor = dexp(-const_factor)
coeff = dtwo_pi * factor * p_inv * p_new
n_pt = 2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) )
const = p * dist_integral * p_new * p_new
if(n_pt == 0) then
res_v(ipoint) = coeff * rint(0, const)
cycle
endif
do i = 0, n_pt_in
d(i) = 0.d0
enddo
p_new = p_new * p_new
call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center(ipoint,1:3)&
, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
if(n_pt_out < 0) then
res_v(ipoint) = 0.d0
cycle
endif
! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i
accu = 0.d0
do i = 0, n_pt_out, 2
accu += d(i) * rint(i/2, const)
enddo
res_v(ipoint) = accu * coeff
enddo
end
! ---
double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
@ -312,6 +398,131 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A
end function NAI_pol_mult_erf_with1s
!--
subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2&
, beta, B_center, C_center, n_pt_in, mu_in, res_v, n_points)
BEGIN_DOC
!
! Computes the following integral :
!
! .. math ::
!
! \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
! \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
! \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
! \exp(-\beta (r - B)^2)
! \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
!
END_DOC
include 'utils/constants.include.F'
implicit none
integer, intent(in) :: n_pt_in, n_points
integer, intent(in) :: power_A1(3), power_A2(3)
double precision, intent(in) :: C_center(n_points,3), A1_center(3), A2_center(3), B_center(n_points,3)
double precision, intent(in) :: alpha1, alpha2, beta, mu_in
double precision, intent(out) :: res_v(n_points)
integer :: i, n_pt, n_pt_out, ipoint
double precision :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
double precision :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
double precision :: dist_integral
double precision :: d(0:n_pt_in), coeff, const, factor
double precision :: accu
double precision :: p_new, p_new2
double precision :: rint
! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
alpha12 = alpha1 + alpha2
alpha12_inv = 1.d0 / alpha12
alpha12_inv_2 = 0.5d0 * alpha12_inv
rho12 = alpha1 * alpha2 * alpha12_inv
A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv
A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
dist12 = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))&
+ (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))&
+ (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
const_factor12 = dist12 * rho12
if(const_factor12 > 80.d0) then
res_v(:) = 0.d0
return
endif
! ---
! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
p = alpha12 + beta
p_inv = 1.d0 / p
p_inv_2 = 0.5d0 * p_inv
rho = alpha12 * beta * p_inv
p_new = mu_in / dsqrt(p + mu_in * mu_in)
p_new2 = p_new * p_new
n_pt = 2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) &
+ power_A1(3) + power_A2(3) )
do ipoint=1,n_points
P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
+ (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
+ (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3))
const_factor = const_factor12 + dist * rho
if(const_factor > 80.d0) then
res_v(ipoint) = 0.d0
cycle
endif
dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
+ (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
+ (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
! ---
factor = dexp(-const_factor)
coeff = dtwo_pi * factor * p_inv * p_new
const = p * dist_integral * p_new2
if(n_pt == 0) then
res_v(ipoint) = coeff * rint(0, const)
cycle
endif
do i = 0, n_pt_in
d(i) = 0.d0
enddo
!TODO: VECTORIZE HERE
call give_polynomial_mult_center_one_e_erf_opt( &
A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3)&
, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center,1)
if(n_pt_out < 0) then
res_v(ipoint) = 0.d0
cycle
endif
! sum of integrals of type : int {t,[0,1]} exp-(rho.(P-Q)^2 * t^2) * t^i
accu = 0.d0
do i = 0, n_pt_out, 2
accu += d(i) * rint(i/2, const)
enddo
res_v(ipoint) = accu * coeff
end do
end
! ---
! ---
subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center &
@ -432,6 +643,7 @@ end subroutine give_polynomial_mult_center_one_e_erf_opt
! ---
subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,&
power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
BEGIN_DOC