From dddd4090085e0970d77ab7337a3fdc683e0099f2 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 21 Nov 2022 02:13:43 +0100
Subject: [PATCH] Vectorizing integrals.

---
 src/ao_many_one_e_ints/ao_erf_gauss.irp.f    | 210 +++++++++++++++---
 src/ao_many_one_e_ints/grad2_jmu_modif.irp.f | 113 ++++++----
 src/ao_one_e_ints/pot_ao_erf_ints.irp.f      | 218 ++++++++++++++++++-
 3 files changed, 466 insertions(+), 75 deletions(-)

diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
index fe25e9c0..4fc9ad6b 100644
--- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
@@ -19,11 +19,11 @@ subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints)
   return
  endif
  n_pt_in = n_pt_max_integrals
- ! j 
+ ! j
  num_A = ao_nucl(j)
  power_A(1:3)= ao_power(j,1:3)
  A_center(1:3) = nucl_coord(num_A,1:3)
- ! i 
+ ! i
  num_B = ao_nucl(i)
  power_B(1:3)= ao_power(i,1:3)
  B_center(1:3) = nucl_coord(num_B,1:3)
@@ -33,19 +33,19 @@ subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints)
   do m=1,ao_prim_num(i)
     beta = ao_expo_ordered_transp(m,i)
     do mm = 1, 3
-     ! (x phi_i ) * phi_j 
+     ! (x phi_i ) * phi_j
      ! x * (x - B_x)^b_x = b_x (x - B_x)^b_x + 1 * (x - B_x)^{b_x+1}
      !
      ! first contribution :: B_x (x - B_x)^b_x :: usual integral multiplied by B_x
      power_B_tmp = power_B
-     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)
      xyz_ints(mm) += contrib * B_center(mm) * ao_coef_normalized_ordered_transp(l,j)             &
-                                            * ao_coef_normalized_ordered_transp(m,i) 
-     ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1 
+                                            * ao_coef_normalized_ordered_transp(m,i)
+     ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1
      power_B_tmp(mm) += 1
-     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)
      xyz_ints(mm) += contrib * 1.d0        * ao_coef_normalized_ordered_transp(l,j)             &
-                                           * ao_coef_normalized_ordered_transp(m,i) 
+                                           * ao_coef_normalized_ordered_transp(m,i)
     enddo
   enddo
  enddo
@@ -58,7 +58,7 @@ double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center)
   BEGIN_DOC
   ! phi_j_erf_mu_r_phi  = int dr phi_j(r) [erf(mu  |r - C|)/|r-C|]  phi_i(r)
   END_DOC
- 
+
   implicit none
   integer,          intent(in) :: i,j
   double precision, intent(in) :: mu_in, C_center(3)
@@ -77,24 +77,24 @@ double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center)
 
   n_pt_in = n_pt_max_integrals
 
-  ! j 
+  ! j
   num_A         = ao_nucl(j)
   power_A(1:3)  = ao_power(j,1:3)
   A_center(1:3) = nucl_coord(num_A,1:3)
 
-  ! i 
+  ! i
   num_B         = ao_nucl(i)
   power_B(1:3)  = ao_power(i,1:3)
   B_center(1:3) = nucl_coord(num_B,1:3)
- 
+
   do l = 1, ao_prim_num(j)
    alpha = ao_expo_ordered_transp(l,j)
    do m = 1, ao_prim_num(i)
      beta = ao_expo_ordered_transp(m,i)
 
-     contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)  
+     contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
 
-     phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) *  ao_coef_normalized_ordered_transp(m,i) 
+     phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) *  ao_coef_normalized_ordered_transp(m,i)
    enddo
   enddo
 
@@ -124,11 +124,11 @@ subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints)
   return
  endif
  n_pt_in = n_pt_max_integrals
- ! j 
+ ! j
  num_A = ao_nucl(j)
  power_A(1:3)= ao_power(j,1:3)
  A_center(1:3) = nucl_coord(num_A,1:3)
- ! i 
+ ! i
  num_B = ao_nucl(i)
  power_B(1:3)= ao_power(i,1:3)
  B_center(1:3) = nucl_coord(num_B,1:3)
@@ -141,7 +141,7 @@ subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints)
     call erfc_mu_gauss_xyz(C_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in,xyz_ints)
     do mm = 1, 4
      gauss_ints(mm) += xyz_ints(mm)  * ao_coef_normalized_ordered_transp(l,j)             &
-                                     * ao_coef_normalized_ordered_transp(m,i) 
+                                     * ao_coef_normalized_ordered_transp(m,i)
     enddo
   enddo
  enddo
@@ -161,7 +161,7 @@ subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints)
   integer,          intent(in)  :: i, j
   double precision, intent(in)  :: mu, C_center(3), delta
   double precision, intent(out) :: gauss_ints
- 
+
   integer                       :: n_pt_in, l, m
   integer                       :: num_A, power_A(3), num_b, power_B(3)
   double precision              :: alpha, beta, A_center(3), B_center(3), coef
@@ -177,16 +177,16 @@ subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints)
 
   n_pt_in = n_pt_max_integrals
 
-  ! j 
+  ! j
   num_A         = ao_nucl(j)
   power_A(1:3)  = ao_power(j,1:3)
   A_center(1:3) = nucl_coord(num_A,1:3)
 
-  ! i 
+  ! i
   num_B         = ao_nucl(i)
   power_B(1:3)  = ao_power(i,1:3)
   B_center(1:3) = nucl_coord(num_B,1:3)
- 
+
   do l = 1, ao_prim_num(j)
    alpha = ao_expo_ordered_transp(l,j)
    do m = 1, ao_prim_num(i)
@@ -219,7 +219,7 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
   !
   END_DOC
 
-  include 'utils/constants.include.F'                                                                                                                                  
+  include 'utils/constants.include.F'
 
   implicit none
 
@@ -274,7 +274,82 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
 end subroutine NAI_pol_x_mult_erf_ao
 
 ! ---
+subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
 
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, n_points
+  double precision, intent(in)  :: mu_in, C_center(n_points,3)
+  double precision, intent(out) :: ints(n_points,3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in
+  integer                       :: power_xA(3), m, ipoint
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef
+  double precision, allocatable :: integral(:)
+  double precision              :: NAI_pol_mult_erf
+
+  ints = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then
+    return
+  endif
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  allocate(integral(n_points))
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      power_xA = power_A
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        beta = ao_expo_ordered_transp(j,j_ao)
+        coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint=1,n_points
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint=1,n_points
+          ints(ipoint,m) += A_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_v
+
+! ---
 subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints)
 
   BEGIN_DOC
@@ -289,7 +364,7 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
   !
   END_DOC
 
-  include 'utils/constants.include.F'                                                                                                                                  
+  include 'utils/constants.include.F'
 
   implicit none
 
@@ -333,7 +408,7 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
 
       do j = 1, ao_prim_num(j_ao)
         alphaj = ao_expo_ordered_transp                   (j,j_ao)
-        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
 
         ! First term = (x-Ax)**(ax+1)
         integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
@@ -351,6 +426,91 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
 
 end subroutine NAI_pol_x_mult_erf_ao_with1s
 
+!--
+
+subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_center, ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)   :: i_ao, j_ao, n_points
+  double precision, intent(in)   :: beta, B_center(n_points,3), mu_in, C_center(n_points,3)
+  double precision, intent(out)  :: ints(n_points,3)
+
+  integer                        :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision               :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
+
+  integer                        :: ipoint
+  double precision, allocatable :: integral(:)
+
+  if(beta .lt. 1d-10) then
+    call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
+    return
+  endif
+
+  ints(:,:) = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  allocate(integral(n_points))
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA     = power_Ai
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        alphaj = ao_expo_ordered_transp                   (j,j_ao)
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, &
+            alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, &
+          alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_with1s
+
+
 ! ---
 
 subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints)
@@ -361,7 +521,7 @@ subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints)
   !
   ! if m == 1 X(m) = x, m == 1 X(m) = y, m == 1 X(m) = z
   END_DOC
- include 'utils/constants.include.F'                                                                                                                                  
+ include 'utils/constants.include.F'
  integer, intent(in) :: i_ao,j_ao,m
  double precision, intent(in) :: mu_in, C_center(3)
  double precision, intent(out):: ints
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index d049ec22..cdc86456 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -175,76 +175,95 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   END_DOC
 
   implicit none
-  integer          :: i, j, ipoint, i_1s, i_fit
-  double precision :: r(3), int_fit(3), expo_fit, coef_fit
-  double precision :: coef, beta, B_center(3), dist
-  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
-  double precision :: tmp_x, tmp_y, tmp_z
-  double precision :: wall0, wall1
+  integer                        :: i, j, ipoint, i_1s, i_fit
+  double precision               :: r(3), expo_fit, coef_fit
+  double precision               :: coef, beta, B_center(3)
+  double precision               :: alpha_1s, alpha_1s_inv, expo_coef_1s, coef_tmp
+  double precision               :: tmp_x, tmp_y, tmp_z
+  double precision               :: wall0, wall1
+  double precision, allocatable  :: int_fit_v(:,:), dist(:), centr_1s(:,:)
 
   provide mu_erf final_grid_points_transp j1b_pen
   call wall_time(wall0)
 
-  int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     &
- !$OMP          tmp_x, tmp_y, tmp_z)                                &
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, &
- !$OMP          final_grid_points_transp, n_max_fit_slat,           &
- !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       &
- !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
- !$OMP DO
+  allocate(dist(n_points_final_grid), centr_1s(n_points_final_grid,3))
   do ipoint = 1, n_points_final_grid
     r(1) = final_grid_points_transp(ipoint,1)
     r(2) = final_grid_points_transp(ipoint,2)
     r(3) = final_grid_points_transp(ipoint,3)
 
-    do i = 1, ao_num
-      do j = i, ao_num
+    dist(ipoint) = (B_center(1) - r(1)) * (B_center(1) - r(1))  &
+                 + (B_center(2) - r(2)) * (B_center(2) - r(2))  &
+                 + (B_center(3) - r(3)) * (B_center(3) - r(3))
+  enddo
 
-        do i_1s = 1, List_all_comb_b3_size
+  int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0
 
-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
-          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
-                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
-                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+  !$OMP PARALLEL DEFAULT (NONE)                                      &
+      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+      !$OMP          coef_fit, expo_fit, int_fit_v, alpha_1s,        &
+      !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, &
+      !$OMP          tmp_x, tmp_y, tmp_z)                            &
+      !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
+      !$OMP          final_grid_points_transp, n_max_fit_slat, dist, &
+      !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,   &
+      !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,   &
+      !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
+  allocate(int_fit_v(n_points_final_grid,3))
 
-          do i_fit = 1, n_max_fit_slat
+  do i_1s = 1, List_all_comb_b3_size
 
-            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
-            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+    coef        = List_all_comb_b3_coef  (i_1s)
+    beta        = List_all_comb_b3_expo  (i_1s)
+    B_center(1) = List_all_comb_b3_cent(1,i_1s)
+    B_center(2) = List_all_comb_b3_cent(2,i_1s)
+    B_center(3) = List_all_comb_b3_cent(3,i_1s)
 
-            alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s
+    do i_fit = 1, n_max_fit_slat
 
-            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
-            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
-            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+      expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+      coef_fit = coef_gauss_j_mu_1_erf(i_fit) * coef
 
-            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+      alpha_1s     = beta + expo_fit
+      alpha_1s_inv = 1.d0 / alpha_1s
 
-            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+      do ipoint = 1, n_points_final_grid
+        r(1) = final_grid_points_transp(ipoint,1)
+        r(2) = final_grid_points_transp(ipoint,2)
+        r(3) = final_grid_points_transp(ipoint,3)
 
-            int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_tmp * int_fit(1)
-            int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_tmp * int_fit(2)
-            int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_tmp * int_fit(3)
+        centr_1s(ipoint,1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+        centr_1s(ipoint,2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+        centr_1s(ipoint,3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+      enddo
+
+      expo_coef_1s = beta * expo_fit * alpha_1s_inv 
+      !$OMP BARRIER
+      !$OMP DO SCHEDULE(dynamic)
+      do i = 1, ao_num
+        do j = i, ao_num
+          call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s,&
+              1.d+9, final_grid_points_transp, int_fit_v, n_points_final_grid)
+
+          do ipoint = 1, n_points_final_grid
+            coef_tmp = coef_fit * dexp(-expo_coef_1s* dist(ipoint))
+            int2_u_grad1u_x_j1b2(1,j,i,ipoint) = &
+              int2_u_grad1u_x_j1b2(1,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,1)
+            int2_u_grad1u_x_j1b2(2,j,i,ipoint) = &
+              int2_u_grad1u_x_j1b2(2,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,2)
+            int2_u_grad1u_x_j1b2(3,j,i,ipoint) = &
+              int2_u_grad1u_x_j1b2(3,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,3)
           enddo
         enddo
-
       enddo
+      !$OMP END DO NOWAIT
+
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  deallocate(int_fit_v)
+  !$OMP END PARALLEL
+
+  deallocate(dist)
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
index 263e9845..96625df5 100644
--- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
@@ -124,7 +124,7 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B,
   ! Computes the following integral :
   !
   ! .. math::
-  ! 
+  !
   !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
   !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
   !
@@ -197,6 +197,92 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B,
 
 end function NAI_pol_mult_erf
 
+! ---
+subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, res_v, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in) :: n_pt_in, n_points
+  integer,          intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: C_center(n_points,3), A_center(3), B_center(3), alpha, beta, mu_in
+  double precision, intent(out) :: res_v(n_points)
+
+  integer                      :: i, n_pt, n_pt_out, ipoint
+  double precision             :: P_center(3)
+  double precision             :: d(0:n_pt_in), coeff, dist, const, factor
+  double precision             :: const_factor, dist_integral
+  double precision             :: accu, p_inv, p, rho, p_inv_2
+  double precision             :: p_new
+
+  double precision             :: rint
+
+  p       = alpha + beta
+  p_inv   = 1.d0 / p
+  p_inv_2 = 0.5d0 * p_inv
+  rho     = alpha * beta * p_inv
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+
+  dist          = 0.d0
+  do i = 1, 3
+    P_center(i)    = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist          += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+  enddo
+
+  do ipoint=1,n_points
+    dist_integral = 0.d0
+    do i = 1, 3
+      dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+    enddo
+    const_factor = dist * rho
+    if(const_factor > 80.d0) then
+      res_V(ipoint) = 0.d0
+      cycle
+    endif
+
+    factor = dexp(-const_factor)
+    coeff  = dtwo_pi * factor * p_inv * p_new
+
+    n_pt  =  2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) )
+    const = p * dist_integral * p_new * p_new
+    if(n_pt == 0) then
+      res_v(ipoint) = coeff * rint(0, const)
+      cycle
+    endif
+
+    do i = 0, n_pt_in
+      d(i) = 0.d0
+    enddo
+    p_new = p_new * p_new
+    call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center(ipoint,1:3)&
+        , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
+
+    if(n_pt_out < 0) then
+      res_v(ipoint) = 0.d0
+      cycle
+    endif
+
+    ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+    accu = 0.d0
+    do i = 0, n_pt_out, 2
+      accu += d(i) * rint(i/2, const)
+    enddo
+    res_v(ipoint) = accu * coeff
+  enddo
+
+end
+
 ! ---
 
 double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
@@ -207,7 +293,7 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A
   ! Computes the following integral :
   !
   ! .. math::
-  ! 
+  !
   !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
   !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
   !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
@@ -312,6 +398,131 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A
 
 end function NAI_pol_mult_erf_with1s
 
+!--
+
+subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2&
+      , beta, B_center, C_center, n_pt_in, mu_in, res_v, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
+  !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
+  !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
+  !   \exp(-\beta (r - B)^2)
+  !   \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in)   :: n_pt_in, n_points
+  integer,          intent(in)   :: power_A1(3), power_A2(3)
+  double precision, intent(in)   :: C_center(n_points,3), A1_center(3), A2_center(3), B_center(n_points,3)
+  double precision, intent(in)   :: alpha1, alpha2, beta, mu_in
+  double precision, intent(out)  :: res_v(n_points)
+
+  integer                        :: i, n_pt, n_pt_out, ipoint
+  double precision               :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
+  double precision               :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
+  double precision               :: dist_integral
+  double precision               :: d(0:n_pt_in), coeff, const, factor
+  double precision               :: accu
+  double precision               :: p_new, p_new2
+
+  double precision               :: rint
+
+
+  ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
+  alpha12       = alpha1 + alpha2
+  alpha12_inv   = 1.d0 / alpha12
+  alpha12_inv_2 = 0.5d0 * alpha12_inv
+  rho12         = alpha1 * alpha2 * alpha12_inv
+  A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv
+  A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
+  A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
+  dist12        = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))&
+      + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))&
+      + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
+
+  const_factor12 = dist12 * rho12
+
+  if(const_factor12 > 80.d0) then
+    res_v(:) = 0.d0
+    return
+  endif
+
+    ! ---
+
+  ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
+  p           = alpha12 + beta
+  p_inv       = 1.d0 / p
+  p_inv_2     = 0.5d0 * p_inv
+  rho         = alpha12 * beta * p_inv
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+  p_new2 = p_new * p_new
+  n_pt  =  2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) &
+              + power_A1(3) + power_A2(3) )
+
+  do ipoint=1,n_points
+
+    P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
+    P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
+    P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
+    dist   = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
+        + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
+        + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3))
+
+    const_factor = const_factor12 + dist * rho
+    if(const_factor > 80.d0) then
+      res_v(ipoint) = 0.d0
+      cycle
+    endif
+
+    dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
+        + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
+        + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
+
+    ! ---
+
+    factor = dexp(-const_factor)
+    coeff  = dtwo_pi * factor * p_inv * p_new
+
+    const = p * dist_integral * p_new2
+    if(n_pt == 0) then
+      res_v(ipoint) = coeff * rint(0, const)
+      cycle
+    endif
+
+    do i = 0, n_pt_in
+      d(i) = 0.d0
+    enddo
+
+    !TODO: VECTORIZE HERE
+    call give_polynomial_mult_center_one_e_erf_opt(                  &
+        A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3)&
+        , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center,1)
+
+    if(n_pt_out < 0) then
+      res_v(ipoint) = 0.d0
+      cycle
+    endif
+
+    ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+    accu = 0.d0
+    do i = 0, n_pt_out, 2
+      accu += d(i) * rint(i/2, const)
+    enddo
+    res_v(ipoint) = accu * coeff
+  end do
+
+end
+
+! ---
 ! ---
 
 subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center &
@@ -432,10 +643,11 @@ end subroutine give_polynomial_mult_center_one_e_erf_opt
 ! ---
 
 
+
 subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,&
       power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
   BEGIN_DOC
-  ! Returns the explicit polynomial in terms of the $t$ variable of the 
+  ! Returns the explicit polynomial in terms of the $t$ variable of the
   ! following polynomial:
   !
   ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.