From 5358bd9a612cca00ac969b5d2bb67038159a4e01 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Sat, 10 Dec 2022 15:23:50 +0100
Subject: [PATCH 1/9] working on the grad squared

---
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   | 428 +++++++++++++-----
 src/ao_many_one_e_ints/grad2_jmu_modif.irp.f  |  14 +-
 .../grad_lapl_jmu_manu.irp.f                  |  12 +-
 .../grad_lapl_jmu_modif.irp.f                 |   6 +-
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   |  90 ++--
 src/non_h_ints_mu/grad_squared.irp.f          |   1 +
 src/non_h_ints_mu/grad_squared_manu.irp.f     | 134 ++++++
 src/non_h_ints_mu/new_grad_tc.irp.f           |   2 +
 src/non_h_ints_mu/new_grad_tc_manu.irp.f      | 146 ++++++
 src/tc_scf/test_int.irp.f                     | 287 ++++++++++--
 10 files changed, 921 insertions(+), 199 deletions(-)
 create mode 100644 src/non_h_ints_mu/grad_squared_manu.irp.f
 create mode 100644 src/non_h_ints_mu/new_grad_tc_manu.irp.f

diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
index c25d8055..7367df88 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -1,110 +1,4 @@
 
-BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
-  !
-  END_DOC
-
-  implicit none
-  integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
-  double precision              :: coef, beta, B_center(3), dist
-  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
-  double precision              :: wall0, wall1
-  double precision, external    :: NAI_pol_mult_erf_ao_with1s
-  double precision :: j12_mu_r12,int_j1b
-  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
-  double precision :: beta_ij,center_ij_1s(3),factor_ij_1s
-  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
-
-  provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent
-  call wall_time(wall0)
-
-
-  int2_u_grad1u_j1b2_test = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
- !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
- !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
- !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b3_size_thr, &
- !$OMP          final_grid_points, ng_fit_jast,                  &
- !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
- !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
- !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test)
- !$OMP DO
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
-        r(1) = final_grid_points(1,ipoint)
-        r(2) = final_grid_points(2,ipoint)
-        r(3) = final_grid_points(3,ipoint)
-
-        tmp = 0.d0
-        do i_1s = 1, List_comb_b3_size_thr(j,i)
-
-          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
-          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
-          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
-          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
-          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
-          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
-          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
-          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
-                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
-                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
-
-          do i_fit = 1, ng_fit_jast
-
-            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
-            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-!            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
-            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
-
-            alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s
-            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
-            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
-            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
-
-            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-            if(expo_coef_1s .gt. 20.d0) cycle
-            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-08) cycle
-
-            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
-
-            tmp += coef_tmp * int_fit
-          enddo
-        enddo
-
-        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  do ipoint = 1, n_points_final_grid
-    do i = 2, ao_num
-      do j = 1, i-1
-        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
-
-END_PROVIDER
-
-! ---
-
 BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num, ao_num, n_points_final_grid)]
 
   BEGIN_DOC
@@ -133,7 +27,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
  !$OMP PARALLEL DEFAULT (NONE)                                       &
      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
      !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s)                &
-     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_b3_size_thr,&
+     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_thr_b3_size,&
      !$OMP          final_grid_points_transp, ng_fit_jast,               &
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
@@ -150,7 +44,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
          cycle
        endif
   
-       do i_1s = 1, List_comb_b3_size_thr(j,i)
+       do i_1s = 1, List_comb_thr_b3_size(j,i)
 
          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
@@ -222,7 +116,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
  !$OMP PARALLEL DEFAULT (NONE)                                       &
      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
      !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
-     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b3_size_thr,&
+     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size,&
      !$OMP          final_grid_points_transp, ng_fit_jast,               &
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
@@ -238,7 +132,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
        cycle
      endif
 
-      do i_1s = 1, List_comb_b3_size_thr(j,i)
+      do i_1s = 1, List_comb_thr_b3_size(j,i)
 
          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
@@ -282,3 +176,317 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3), tmp
+  double precision              :: wall0, wall1,int_j1b
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u2_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b)          & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+          
+            expo_fit = expo_gauss_j_mu_x_2(i_fit)
+            coef_fit = coef_gauss_j_mu_x_2(i_fit)
+          
+            ! ---
+          
+              int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+          
+              tmp += coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u2_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u2_j1b2_test(j,i,ipoint) = int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s, i_fit
+  double precision :: r(3), int_fit(3), expo_fit, coef_fit
+  double precision :: coef, beta, B_center(3), dist
+  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
+  double precision :: tmp_x, tmp_y, tmp_z, int_j1b
+  double precision :: wall0, wall1
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u_grad1u_x_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
+ !$OMP          tmp_x, tmp_y, tmp_z,int_j1b)                        & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP DO
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          do i_fit = 1, ng_fit_jast
+    
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+    
+            ! ---
+    
+!            call NAI_pol_x_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r, int_fit)
+!            tmp_x += coef_fit * int_fit(1)
+!            tmp_y += coef_fit * int_fit(2)
+!            tmp_z += coef_fit * int_fit(3)
+!            if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
+
+          ! ---
+
+
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-10) cycle
+            
+            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+
+            tmp_x += coef_tmp * int_fit(1)
+            tmp_y += coef_tmp * int_fit(2)
+            tmp_z += coef_tmp * int_fit(3)
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_x_j1b2_test(1,j,i,ipoint) = tmp_x
+        int2_u_grad1u_x_j1b2_test(2,j,i,ipoint) = tmp_y
+        int2_u_grad1u_x_j1b2_test(3,j,i,ipoint) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_x_j1b2_test(1,j,i,ipoint) = int2_u_grad1u_x_j1b2_test(1,i,j,ipoint)
+        int2_u_grad1u_x_j1b2_test(2,j,i,ipoint) = int2_u_grad1u_x_j1b2_test(2,i,j,ipoint)
+        int2_u_grad1u_x_j1b2_test(3,j,i,ipoint) = int2_u_grad1u_x_j1b2_test(3,i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_x_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+  double precision :: j12_mu_r12,int_j1b
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+  double precision :: beta_ij,center_ij_1s(3),factor_ij_1s
+  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+
+  provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent
+  call wall_time(wall0)
+
+
+  int2_u_grad1u_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
+ !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, &
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+
+          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+!            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 20.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-08) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+        enddo
+
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index 872bfaef..5cd2aac6 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -51,7 +51,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += -0.25d0 * coef_fit * int_fit
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -143,7 +143,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += coef_fit * int_fit
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -241,7 +241,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
           tmp_x += coef_fit * int_fit(1)
           tmp_y += coef_fit * int_fit(2)
           tmp_z += coef_fit * int_fit(3)
-          if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
+!          if( dabs(coef_fit)*(dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
 
           ! ---
 
@@ -265,7 +265,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
+!            if(dabs(coef_tmp) .lt. 1d-12) cycle
             
             call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
@@ -351,7 +351,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
           ! ---
 
           int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r)
-!          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit)*dabs(int_fit) .lt. 1d-12) cycle
 
           tmp += coef_fit * int_fit
 
@@ -375,9 +375,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
             centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-!            if(expo_coef_1s .gt. 80.d0) cycle
+            if(expo_coef_1s .gt. 80.d0) cycle
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-!            if(dabs(coef_tmp) .lt. 1d-10) cycle
+            if(dabs(coef_tmp) .lt. 1d-12) cycle
 
             int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
 
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
index 1b457d68..382f6351 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -25,7 +25,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num,
 
  !$OMP PARALLEL DEFAULT (NONE)                                                         &
  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp, int_j1b)& 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b2_size_thr, final_grid_points, &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, &
  !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b,  &
  !$OMP          v_ij_erf_rk_cst_mu_j1b_test, mu_erf,                                   &
  !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
@@ -41,7 +41,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num,
         if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
 
         tmp = 0.d0
-        do i_1s = 1, List_comb_b2_size_thr(j,i)
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
 
           coef        = List_comb_thr_b2_coef  (i_1s,j,i)
           beta        = List_comb_thr_b2_expo  (i_1s,j,i)
@@ -129,7 +129,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
  !$OMP PARALLEL DEFAULT (NONE)                                                        &
  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
  !$OMP          int_j1b, tmp_x, tmp_y, tmp_z)                                                  & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b2_size_thr, final_grid_points,&
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
  !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
  !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
  !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
@@ -147,7 +147,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
         tmp_x = 0.d0
         tmp_y = 0.d0
         tmp_z = 0.d0
-        do i_1s = 1, List_comb_b2_size_thr(j,i)
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
 
           coef        = List_comb_thr_b2_coef  (i_1s,j,i)
           beta        = List_comb_thr_b2_expo  (i_1s,j,i)
@@ -223,7 +223,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po
  !$OMP SHARED  (n_points_final_grid, ao_num,  & 
  !$OMP          final_grid_points, ng_fit_jast,                  &
  !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
- !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_b2_size_thr,       & 
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
  !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b,      &
  !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
  !$OMP DO
@@ -238,7 +238,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po
         if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
 
         tmp = 0.d0
-        do i_1s = 1, List_comb_b2_size_thr(j,i)
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
 
           coef        = List_comb_thr_b2_coef  (i_1s,j,i)
           beta        = List_comb_thr_b2_expo  (i_1s,j,i)
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
index 6a662533..8fff961b 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
@@ -49,7 +49,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
 
         int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
         int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
-        if(dabs(int_mu - int_coulomb) .lt. 1d-10) cycle
+!        if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle
 
         tmp += coef * (int_mu - int_coulomb)
 
@@ -169,7 +169,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
 
-        if( (dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
+!        if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
 
         tmp_x += coef * (ints(1) - ints_coulomb(1))
         tmp_y += coef * (ints(2) - ints_coulomb(2))
@@ -277,7 +277,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
           B_center(3) = List_all_comb_b2_cent(3,1)
 
           int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(int_fit*coef) .lt. 1d-12) cycle
 
           tmp += coef * coef_fit * int_fit
 
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
index 606664f8..934ccab1 100644
--- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -1,70 +1,70 @@
 
- BEGIN_PROVIDER [ integer, List_comb_b2_size_thr, (ao_num, ao_num)]
-&BEGIN_PROVIDER [ integer, max_List_comb_b2_size_thr]
+ BEGIN_PROVIDER [ integer, List_comb_thr_b2_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size]
  implicit none
  integer :: i_1s,i,j,ipoint
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-10
- List_comb_b2_size_thr = 0
+ thr = 1.d-12
+ List_comb_thr_b2_size = 0
  do i = 1, ao_num
   do j = i, ao_num
    do i_1s = 1, List_all_comb_b2_size
      coef        = List_all_comb_b2_coef  (i_1s)
-     if(dabs(coef).lt.1.d-10)cycle
+     if(dabs(coef).lt.1.d-12)cycle
      beta        = List_all_comb_b2_expo  (i_1s)
-     beta = max(beta,1.d-10)
+     beta = max(beta,1.d-12)
      center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
      int_j1b = 0.d0
-     do ipoint = 1, n_points_final_grid
-      r(1:3) = final_grid_points(1:3,ipoint)
-      weight = final_weight_at_r_vector(ipoint)
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
       int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
-      List_comb_b2_size_thr(j,i) += 1
+      List_comb_thr_b2_size(j,i) += 1
      endif
    enddo
   enddo 
  enddo
  do i = 1, ao_num
   do j = 1, i-1
-    List_comb_b2_size_thr(j,i) = List_comb_b2_size_thr(i,j)
+    List_comb_thr_b2_size(j,i) = List_comb_thr_b2_size(i,j)
   enddo
  enddo
  integer :: list(ao_num)
  do i = 1, ao_num
-  list(i) = maxval(List_comb_b2_size_thr(:,i))
+  list(i) = maxval(List_comb_thr_b2_size(:,i))
  enddo
- max_List_comb_b2_size_thr = maxval(list) 
+ max_List_comb_thr_b2_size = maxval(list) 
  
 END_PROVIDER 
 
- BEGIN_PROVIDER [ double precision, List_comb_thr_b2_coef, (   max_List_comb_b2_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_expo, (   max_List_comb_b2_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_cent, (3, max_List_comb_b2_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_b2_size_thr ,ao_num, ao_num)]
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b2_coef, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_expo, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_cent, (3, max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)]
  implicit none
  integer :: i_1s,i,j,ipoint,icount
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-10
+ thr = 1.d-12
  ao_abs_comb_b2_j1b = 10000000.d0
  do i = 1, ao_num
   do j = i, ao_num
    icount = 0
    do i_1s = 1, List_all_comb_b2_size
      coef        = List_all_comb_b2_coef  (i_1s)
-     if(dabs(coef).lt.1.d-10)cycle
+     if(dabs(coef).lt.1.d-12)cycle
      beta        = List_all_comb_b2_expo  (i_1s)
      center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
      int_j1b = 0.d0
-     do ipoint = 1, n_points_final_grid
-      r(1:3) = final_grid_points(1:3,ipoint)
-      weight = final_weight_at_r_vector(ipoint)
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
@@ -83,7 +83,7 @@ END_PROVIDER
 
  do i = 1, ao_num
   do j = 1, i-1
-    do icount = 1, List_comb_b2_size_thr(j,i)
+    do icount = 1, List_comb_thr_b2_size(j,i)
      List_comb_thr_b2_coef(icount,j,i) = List_comb_thr_b2_coef(icount,i,j)
      List_comb_thr_b2_expo(icount,j,i) = List_comb_thr_b2_expo(icount,i,j)
      List_comb_thr_b2_cent(1:3,icount,j,i) = List_comb_thr_b2_cent(1:3,icount,i,j)
@@ -94,14 +94,14 @@ END_PROVIDER
 END_PROVIDER 
 
 
- BEGIN_PROVIDER [ integer, List_comb_b3_size_thr, (ao_num, ao_num)]
-&BEGIN_PROVIDER [ integer, max_List_comb_b3_size_thr]
+ BEGIN_PROVIDER [ integer, List_comb_thr_b3_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size]
  implicit none
  integer :: i_1s,i,j,ipoint
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-10
- List_comb_b3_size_thr = 0
+ thr = 1.d-12
+ List_comb_thr_b3_size = 0
  do i = 1, ao_num
   do j = 1, ao_num
    do i_1s = 1, List_all_comb_b3_size
@@ -110,43 +110,43 @@ END_PROVIDER
      center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
      if(dabs(coef).lt.thr)cycle
      int_j1b = 0.d0
-     do ipoint = 1, n_points_final_grid
-      r(1:3) = final_grid_points(1:3,ipoint)
-      weight = final_weight_at_r_vector(ipoint)
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
       int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
-      List_comb_b3_size_thr(j,i) += 1
+      List_comb_thr_b3_size(j,i) += 1
      endif
    enddo
   enddo 
  enddo
 ! do i = 1, ao_num
 !  do j = 1, i-1
-!    List_comb_b3_size_thr(j,i) = List_comb_b3_size_thr(i,j)
+!    List_comb_thr_b3_size(j,i) = List_comb_thr_b3_size(i,j)
 !  enddo
 ! enddo
  integer :: list(ao_num)
  do i = 1, ao_num
-  list(i) = maxval(List_comb_b3_size_thr(:,i))
+  list(i) = maxval(List_comb_thr_b3_size(:,i))
  enddo
- max_List_comb_b3_size_thr = maxval(list) 
- print*,'max_List_comb_b3_size_thr =  ',max_List_comb_b3_size_thr
+ max_List_comb_thr_b3_size = maxval(list) 
+ print*,'max_List_comb_thr_b3_size =  ',max_List_comb_thr_b3_size
  
 END_PROVIDER 
 
- BEGIN_PROVIDER [ double precision, List_comb_thr_b3_coef, (   max_List_comb_b3_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_expo, (   max_List_comb_b3_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_cent, (3, max_List_comb_b3_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_b3_size_thr ,ao_num, ao_num)]
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b3_coef, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_expo, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_cent, (3, max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)]
  implicit none
  integer :: i_1s,i,j,ipoint,icount
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-10
+ thr = 1.d-12
  ao_abs_comb_b3_j1b = 10000000.d0
  do i = 1, ao_num
   do j = 1, ao_num
@@ -154,13 +154,13 @@ END_PROVIDER
    do i_1s = 1, List_all_comb_b3_size
      coef        = List_all_comb_b3_coef  (i_1s)
      beta        = List_all_comb_b3_expo  (i_1s)
-     beta = max(beta,1.d-10)
+     beta = max(beta,1.d-12)
      center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
      if(dabs(coef).lt.thr)cycle
      int_j1b = 0.d0
-     do ipoint = 1, n_points_final_grid
-      r(1:3) = final_grid_points(1:3,ipoint)
-      weight = final_weight_at_r_vector(ipoint)
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
@@ -179,7 +179,7 @@ END_PROVIDER
 
 ! do i = 1, ao_num
 !  do j = 1, i-1
-!    do icount = 1, List_comb_b3_size_thr(j,i)
+!    do icount = 1, List_comb_thr_b3_size(j,i)
 !     List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j)
 !     List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j)
 !     List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j)
diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f
index 4e70bc5c..c941b427 100644
--- a/src/non_h_ints_mu/grad_squared.irp.f
+++ b/src/non_h_ints_mu/grad_squared.irp.f
@@ -290,6 +290,7 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_
 
 END_PROVIDER 
 
+! ---
 ! ---
 
 BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, n_points_final_grid) ]
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
new file mode 100644
index 00000000..c4189535
--- /dev/null
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -0,0 +1,134 @@
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao_test(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
+  ac_mat = 0.d0
+  allocate(bc_mat(ao_num,ao_num,ao_num,ao_num))
+  bc_mat = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    weight1 = final_weight_at_r_vector(ipoint)
+
+    do i = 1, ao_num
+      ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+
+      do k = 1, ao_num
+        ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+
+        do j = 1, ao_num
+          do l = 1, ao_num
+            ac_mat(k,i,l,j) += ao_ik_r * ( u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) )
+            bc_mat(k,i,l,j) += ao_ik_r * grad12_j12(l,j,ipoint)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+  deallocate(bc_mat)
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_final_grid) ]
+
+  implicit none
+  integer                    :: ipoint, i, j
+  double precision           :: tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1
+  double precision           :: time0, time1
+
+  print*, ' providing u12sq_j1bsq_test ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+    tmp1  = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        u12sq_j1bsq_test(i,j,ipoint) = tmp1 * int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+
+    x     = final_grid_points(1,ipoint)
+    y     = final_grid_points(2,ipoint)
+    z     = final_grid_points(3,ipoint)
+    tmp_v = v_1b       (ipoint)
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+
+    tmp3 = tmp_v * tmp_x
+    tmp4 = tmp_v * tmp_y
+    tmp5 = tmp_v * tmp_z
+
+    tmp6 = -x * tmp3
+    tmp7 = -y * tmp4
+    tmp8 = -z * tmp5
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
+
+        u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(1,i,j,ipoint) &
+                                                     + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(2,i,j,ipoint) &
+                                                     + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2_test(3,i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f
index d34e629c..e05492f0 100644
--- a/src/non_h_ints_mu/new_grad_tc.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc.irp.f
@@ -82,6 +82,7 @@ END_PROVIDER
 
 ! ---
 
+
 BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
@@ -149,3 +150,4 @@ END_PROVIDER
 
 ! ---
 
+
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
new file mode 100644
index 00000000..79d845e8
--- /dev/null
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -0,0 +1,146 @@
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_ao_test(:,i,j,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int2_grad1_u12_ao_test(:,i,j,ipoint) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int2_grad1_u12_ao_test(:,i,j,ipoint) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
+  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(1,i,j,ipoint) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,i,j,ipoint) - tmp2 * tmp_x
+          int2_grad1_u12_ao_test(2,i,j,ipoint) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,i,j,ipoint) - tmp2 * tmp_y
+          int2_grad1_u12_ao_test(3,i,j,ipoint) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,i,j,ipoint) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(1,i,j,ipoint) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(1,i,j,ipoint)
+          int2_grad1_u12_ao_test(2,i,j,ipoint) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(2,i,j,ipoint)
+          int2_grad1_u12_ao_test(3,i,j,ipoint) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(3,i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+    int2_grad1_u12_ao_test *= 0.5d0
+
+  endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
+  double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision, allocatable :: ac_mat(:,:,:,:)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
+  ac_mat = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+
+    do i = 1, ao_num
+      ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
+      ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
+      ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
+      ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+
+      do k = 1, ao_num
+        ao_k_r = aos_in_r_array_transp(ipoint,k)
+
+        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
+        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
+        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
+
+        do j = 1, ao_num
+          do l = 1, ao_num
+
+            contrib_x = int2_grad1_u12_ao_test(1,l,j,ipoint) * tmp_x 
+            contrib_y = int2_grad1_u12_ao_test(2,l,j,ipoint) * tmp_y 
+            contrib_z = int2_grad1_u12_ao_test(3,l,j,ipoint) * tmp_z 
+
+            ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ 
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+
+END_PROVIDER 
+
+! ---
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index 6961d2f0..545dec48 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -9,20 +9,37 @@ program test_ints
   print *, 'starting ...'
 
   my_grid_becke  = .True.
-!  my_n_pt_r_grid = 30
+  my_n_pt_r_grid = 10
 !  my_n_pt_a_grid = 50
-  my_n_pt_r_grid = 10 ! small grid for quick debug
-  my_n_pt_a_grid = 26 ! small grid for quick debug
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+  my_n_pt_a_grid = 14 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
- call routine_int2_u_grad1u_j1b2 
- call routine_v_ij_erf_rk_cst_mu_j1b
- call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
- call routine_v_ij_u_cst_mu_j1b
 
-!
-! call routine_test_j1b
+  my_n_pt_r_extra_grid = 30
+  my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug
+  touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
 
+!! OK 
+!call routine_int2_u_grad1u_j1b2 
+!! OK
+!call routine_v_ij_erf_rk_cst_mu_j1b
+!! OK 
+! call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
+!! OK
+! call routine_v_ij_u_cst_mu_j1b
+
+!! OK
+!call routine_int2_u2_j1b2
+
+!! OK
+!call routine_int2_u_grad1u_x_j1b2
+
+!! OK 
 ! call routine_int2_grad1u2_grad2u2_j1b2
+! call routine_int2_u_grad1u_j1b2
+! call test_total_grad_lapl
+ call test_total_grad_square
+
 end
 
 subroutine routine_test_j1b
@@ -42,7 +59,7 @@ subroutine routine_test_j1b
  print*,'List_all_comb_b3_coef,icount = ',List_all_comb_b3_size,icount
  do i = 1, ao_num
   do j = 1, ao_num
-   do icount = 1, List_comb_b3_size_thr(j,i)
+   do icount = 1, List_comb_thr_b3_size(j,i)
     print*,'',j,i
     print*,List_comb_thr_b3_expo(icount,j,i),List_comb_thr_b3_coef(icount,j,i)
     print*,List_comb_thr_b3_cent(1:3,icount,j,i)
@@ -51,7 +68,7 @@ subroutine routine_test_j1b
 !   enddo
   enddo
  enddo
- print*,'max_List_comb_b3_size_thr = ',max_List_comb_b3_size_thr,List_all_comb_b3_size
+ print*,'max_List_comb_thr_b3_size = ',max_List_comb_thr_b3_size,List_all_comb_b3_size
 
 end
 
@@ -221,7 +238,7 @@ end
 
 
 
-subroutine routine_v_ij_u_cst_mu_j1b
+subroutine routine_v_ij_u_cst_mu_j1b_test
  implicit none
  integer :: i,j,ipoint,k,l
  double precision :: weight,accu_relat, accu_abs, contrib
@@ -286,13 +303,13 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
  double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
  double precision, allocatable :: ints(:,:,:)
  allocate(ints(ao_num, ao_num, n_points_final_grid))
- do ipoint = 1, n_points_final_grid
-  do i = 1, ao_num
-   do j = 1, ao_num
-    read(33,*)ints(j,i,ipoint)
-   enddo
-  enddo
- enddo
+! do ipoint = 1, n_points_final_grid
+!  do i = 1, ao_num
+!   do j = 1, ao_num
+!    read(33,*)ints(j,i,ipoint)
+!   enddo
+!  enddo
+! enddo
 
  allocate(array(ao_num, ao_num, ao_num, ao_num))
  array = 0.d0
@@ -306,18 +323,18 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
      do j = 1, ao_num
       array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !     !array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !     !array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-!     !array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !     !array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
-       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint)).gt.1.d-6)then
-        print*,j,i,ipoint
-        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint))
+!      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint)).gt.1.d-6)then
+!        print*,j,i,ipoint
+!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint))
 !        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint))
-        stop
-       endif
-      endif
+!        stop
+!       endif
+!      endif
      enddo
     enddo
    enddo
@@ -343,4 +360,218 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
 
   
 
+end
+
+subroutine routine_int2_u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_int2_u_grad1u_x_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += int2_u_grad1u_x_j1b2_test(m,j,i,ipoint) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += int2_u_grad1u_x_j1b2(m,j,i,ipoint)      * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_u_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine test_total_grad_lapl
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_and_lapl_ao_test(j,i,l,k) - tc_grad_and_lapl_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_and_lapl_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_and_lapl_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_total_grad_square
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_square_ao_test(j,i,l,k) - tc_grad_square_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_square_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_square_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
 end

From ce79917152da002f47ab2052cd9b85646d610aec Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Sat, 10 Dec 2022 17:40:17 +0100
Subject: [PATCH 2/9] gain a factor 27 on tc_scf

---
 src/non_h_ints_mu/grad_squared_manu.irp.f | 54 ++++++++++++++++++++++-
 src/non_h_ints_mu/total_tc_int.irp.f      | 47 +++++++++++++++-----
 src/tc_keywords/EZFIO.cfg                 |  6 +++
 src/tc_scf/test_int.irp.f                 | 17 +++++--
 4 files changed, 109 insertions(+), 15 deletions(-)

diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
index c4189535..ada174e5 100644
--- a/src/non_h_ints_mu/grad_squared_manu.irp.f
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -29,7 +29,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu
         do j = 1, ao_num
           do l = 1, ao_num
             ac_mat(k,i,l,j) += ao_ik_r * ( u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) )
-            bc_mat(k,i,l,j) += ao_ik_r * grad12_j12(l,j,ipoint)
+            bc_mat(k,i,l,j) += ao_ik_r * grad12_j12_test(l,j,ipoint)
           enddo
         enddo
       enddo
@@ -132,3 +132,55 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: r(3), delta, coef
+  double precision           :: tmp1
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing grad12_j12_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      tmp1 = v_1b(ipoint)
+      tmp1 = tmp1 * tmp1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          grad12_j12_test(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    grad12_j12_test = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            grad12_j12_test(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for grad12_j12_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f
index 979296d1..bdd5e5ac 100644
--- a/src/non_h_ints_mu/total_tc_int.irp.f
+++ b/src/non_h_ints_mu/total_tc_int.irp.f
@@ -8,16 +8,20 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao
   double precision :: wall1, wall0
 
   call wall_time(wall0)
-
-  do j = 1, ao_num
-    do l = 1, ao_num
-      do i = 1, ao_num
-        do k = 1, ao_num
-          ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
-        enddo
-      enddo
-    enddo
-  enddo
+  
+  if(test_cycle_tc)then
+   ao_tc_int_chemist = ao_tc_int_chemist_test
+  else
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+  endif
 
   call wall_time(wall1)
   print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0
@@ -26,6 +30,29 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: wall1, wall0
+
+  call wall_time(wall0)
+
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_and_lapl_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+  call wall_time(wall1)
+  print *, ' wall time for ao_tc_int_chemist_test ', wall1 - wall0
+END_PROVIDER 
+
+! ---
+
 BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num) ]
 
   BEGIN_DOC
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index a206dfa9..0cbdb753 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -166,3 +166,9 @@ doc: Thresholds on the Imag part of energy
 interface: ezfio,provider,ocaml
 default: 1.e-7
 
+[test_cycle_tc]
+type: logical
+doc: If |true|, the integrals of the three-body jastrow are computed with cycles
+interface: ezfio,provider,ocaml
+default: False
+
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index 545dec48..1947bb92 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -9,10 +9,10 @@ program test_ints
   print *, 'starting ...'
 
   my_grid_becke  = .True.
-  my_n_pt_r_grid = 10
-!  my_n_pt_a_grid = 50
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
 !  my_n_pt_r_grid = 10 ! small grid for quick debug
-  my_n_pt_a_grid = 14 ! small grid for quick debug
+!  my_n_pt_a_grid = 14 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 
   my_n_pt_r_extra_grid = 30
@@ -38,10 +38,19 @@ program test_ints
 ! call routine_int2_grad1u2_grad2u2_j1b2
 ! call routine_int2_u_grad1u_j1b2
 ! call test_total_grad_lapl
- call test_total_grad_square
+! call test_total_grad_square
+ call test_ao_tc_int_chemist
 
 end
 
+subroutine test_ao_tc_int_chemist
+ implicit none
+ provide ao_tc_int_chemist
+! provide ao_tc_int_chemist_test
+! provide tc_grad_square_ao_test
+! provide tc_grad_and_lapl_ao_test
+end
+
 subroutine routine_test_j1b
  implicit none
  integer :: i,icount,j

From 23ec3ba18af62b13e97c0fecc4e7b06a30a208e1 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Mon, 12 Dec 2022 16:49:31 +0100
Subject: [PATCH 3/9] trying to prune grid points per AO couples

---
 src/ao_many_one_e_ints/ao_gaus_gauss.irp.f    | 47 +++++++++
 .../grad_lapl_jmu_manu.irp.f                  | 99 ++++++++++++++++++-
 src/ao_many_one_e_ints/list_grid.irp.f        | 59 +++++++++++
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   |  2 +-
 .../prim_int_gauss_gauss.irp.f                | 68 ++++++++++++-
 src/ao_tc_eff_map/fit_j.irp.f                 | 25 +++++
 src/bi_ort_ints/semi_num_ints_mo.irp.f        | 30 ++++--
 src/dft_utils_in_r/ao_in_r.irp.f              | 41 ++++++++
 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f      | 32 +++---
 src/non_h_ints_mu/grad_squared_manu.irp.f     |  2 +-
 src/tc_scf/test_int.irp.f                     | 53 +++++++++-
 11 files changed, 424 insertions(+), 34 deletions(-)
 create mode 100644 src/ao_many_one_e_ints/list_grid.irp.f

diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
index 213a63e4..ad215b41 100644
--- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -156,6 +156,53 @@ end function overlap_gauss_r12_ao
 
 ! --
 
+double precision function overlap_abs_gauss_r12_ao(D_center, delta, i, j)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: D_center(3), delta
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j
+
+  double precision, external   :: overlap_abs_gauss_r12
+
+  overlap_abs_gauss_r12_ao = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      analytical_j = overlap_abs_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+      overlap_abs_gauss_r12_ao += dabs(coef * analytical_j)
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao
+
+! --
+
 subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
 
   BEGIN_DOC
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
index 382f6351..c7a171f8 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -94,9 +94,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu
   do ipoint = 1, n_points_final_grid
     do i = 1, ao_num
       do j = 1, ao_num
-        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,j,i,ipoint)
       enddo
     enddo
   enddo
@@ -285,3 +285,96 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) with u(mu,r12) \approx 1/2 mu e^{-2.5 * mu (r12)^2}
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_ng_1_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s,  r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, expo_good_j_mu_1gauss,coef_good_j_mu_1gauss,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_good_j_mu_1gauss
+            coef_fit = 1.d0
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+!          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_ng_1_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_ng_1_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/list_grid.irp.f b/src/ao_many_one_e_ints/list_grid.irp.f
new file mode 100644
index 00000000..ccdc33ad
--- /dev/null
+++ b/src/ao_many_one_e_ints/list_grid.irp.f
@@ -0,0 +1,59 @@
+ BEGIN_PROVIDER [ integer, n_pts_grid_ao_prod, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_n_pts_grid_ao_prod]
+ implicit none
+ integer :: i,j,ipoint
+ double precision :: overlap, r(3),thr, overlap_abs_gauss_r12_ao,overlap_gauss_r12_ao
+ double precision :: sigma,dist,center_ij(3),fact_gauss, alpha, center(3)
+ n_pts_grid_ao_prod = 0
+ thr = 1.d-11
+ print*,' expo_good_j_mu_1gauss = ',expo_good_j_mu_1gauss
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, r, overlap, thr,fact_gauss, alpha, center,dist,sigma,center_ij) &
+ !$OMP SHARED  (n_points_final_grid, ao_num, ao_overlap_abs_grid,n_pts_grid_ao_prod,expo_good_j_mu_1gauss,&
+ !$OMP          final_grid_points,ao_prod_center,ao_prod_sigma,ao_nucl)
+ !$OMP DO
+ do i = 1, ao_num
+! do i = 3,3
+  do j = 1, ao_num
+! do i = 22,22
+!  do j = 9,9
+   center_ij(1:3) = ao_prod_center(1:3,j,i)
+   sigma = ao_prod_sigma(j,i)
+   sigma *= sigma
+   sigma = 0.5d0 /sigma
+!   if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+   do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+    dist  = (center_ij(1) - r(1))*(center_ij(1) - r(1))
+    dist += (center_ij(2) - r(2))*(center_ij(2) - r(2))
+    dist += (center_ij(3) - r(3))*(center_ij(3) - r(3))
+    dist = dsqrt(dist)
+    call gaussian_product(sigma, center_ij, expo_good_j_mu_1gauss, r, fact_gauss, alpha, center)
+!    print*,''
+!    print*,j,i,ao_overlap_abs_grid(j,i),ao_overlap_abs(j,i)
+!    print*,r
+!    print*,dist,sigma
+!    print*,fact_gauss
+    if( fact_gauss*ao_overlap_abs_grid(j,i).lt.1.d-11)cycle
+    if(ao_nucl(i) == ao_nucl(j))then
+     overlap = overlap_abs_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    else
+     overlap = overlap_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    endif
+!    print*,overlap
+    if(dabs(overlap).lt.thr)cycle
+    n_pts_grid_ao_prod(j,i) += 1
+   enddo
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(n_pts_grid_ao_prod(:,i))
+ enddo
+ max_n_pts_grid_ao_prod = maxval(list) 
+END_PROVIDER 
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
index 934ccab1..9481d363 100644
--- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -116,7 +116,7 @@ END_PROVIDER
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
-      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
       List_comb_thr_b3_size(j,i) += 1
diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
index cfdaf95f..dcd1db66 100644
--- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -26,14 +26,16 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
   dim1=100
   thr = 1.d-10
   d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_gauss_r12 = 0.d0
 
   ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
   call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
       delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
   ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
   accu = 0.d0
   do lx = 0, iorder_a_new(1)
-    coefx = A_new(lx,1)
+    coefx = A_new(lx,1)*fact_a_new
     if(dabs(coefx).lt.thr)cycle
     iorder_tmp(1) = lx
     do ly = 0, iorder_a_new(2)
@@ -51,7 +53,69 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
       enddo
     enddo
   enddo
-  overlap_gauss_r12 = fact_a_new * accu
+  overlap_gauss_r12 = accu
+end
+
+!---
+double precision function overlap_abs_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) |(x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )|
+  !
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+  double precision, intent(in)   :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in)   :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)            :: power_A(3),power_B(3)
+
+  double precision               :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision               :: A_new(0:max_dim,3)! new polynom
+  double precision               :: A_center_new(3)   ! new center
+  integer                        :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision               :: alpha_new         ! new exponent
+  double precision               :: fact_a_new        ! constant factor
+  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,dx,lower_exp_val
+  integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
+  dim1=50
+  lower_exp_val = 40.d0
+  thr = 1.d-12
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_abs_gauss_r12 = 0.d0
+
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)*fact_a_new
+!    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+    do ly = 0, iorder_a_new(2)
+      coefy = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy).lt.thr)cycle
+      iorder_tmp(2) = ly
+      do lz = 0, iorder_a_new(3)
+        coefz = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz).lt.thr)cycle
+        iorder_tmp(3) = lz
+        call overlap_x_abs(A_center_new(1),B_center(1),alpha_new,beta,iorder_tmp(1),power_B(1),overlap_x,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(2),B_center(2),alpha_new,beta,iorder_tmp(2),power_B(2),overlap_y,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(3),B_center(3),alpha_new,beta,iorder_tmp(3),power_B(3),overlap_z,lower_exp_val,dx,dim1)
+        accu += dabs(coefxyz * overlap_x * overlap_y * overlap_z)
+      enddo
+    enddo
+  enddo
+  overlap_abs_gauss_r12= accu
 end
 
 !---
diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f
index 8fad9079..d861054e 100644
--- a/src/ao_tc_eff_map/fit_j.irp.f
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@@ -1,5 +1,30 @@
+ BEGIN_PROVIDER [ double precision, expo_j_xmu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_j_xmu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! Upper bound long range fit of F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) 
+ !
+ ! with a single gaussian. 
+ !
+ ! Such a function can be used to screen integrals with F(x). 
+ END_DOC
+ expo_j_xmu_1gauss  = 0.5d0
+ coef_j_xmu_1gauss  = 1.d0
+END_PROVIDER 
 ! ---
 
+ BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! exponent of Gaussian in order to obtain an upper bound of J(r12,mu)
+ !
+ ! Can be used to scree integrals with J(r12,mu)
+ END_DOC
+ expo_good_j_mu_1gauss = 2.D0 * mu_erf * expo_j_xmu_1gauss
+ coef_good_j_mu_1gauss = 0.5d0/mu_erf * coef_j_xmu_1gauss
+ END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, expo_j_xmu, (n_fit_1_erf_x) ]
  implicit none
  BEGIN_DOC
diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
index 4762c25e..746593dc 100644
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -108,15 +108,27 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
   double precision :: wall0, wall1
 
   call wall_time(wall0)
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = 1, ao_num
-        int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(1,j,i,ipoint)
-        int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(2,j,i,ipoint)
-        int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(3,j,i,ipoint)
-      enddo
-    enddo
-  enddo
+  if(test_cycle_tc)then
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(1,j,i,ipoint)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(2,j,i,ipoint)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(3,j,i,ipoint)
+       enddo
+     enddo
+   enddo
+  else
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(1,j,i,ipoint)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(2,j,i,ipoint)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(3,j,i,ipoint)
+       enddo
+     enddo
+   enddo
+  endif
   call wall_time(wall1)
   print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0
 
diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f
index 6fa6a4c7..72f820ec 100644
--- a/src/dft_utils_in_r/ao_in_r.irp.f
+++ b/src/dft_utils_in_r/ao_in_r.irp.f
@@ -40,6 +40,47 @@
  END_PROVIDER
 
 
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i,r,aos_array,j) & 
+ !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra)
+ do i = 1, n_points_extra_final_grid
+  r(1) = final_grid_points_extra(1,i)
+  r(2) = final_grid_points_extra(2,i)
+  r(3) = final_grid_points_extra(3,i)
+  call give_all_aos_at_r(r,aos_array)
+  do j = 1, ao_num
+   aos_in_r_array_extra(j,i) = aos_array(j)
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ do i = 1, n_points_extra_final_grid
+  do j = 1, ao_num
+   aos_in_r_array_extra_transp(i,j) = aos_in_r_array_extra(j,i) 
+  enddo
+ enddo
+
+ END_PROVIDER
+
+
 
  BEGIN_PROVIDER[double precision, aos_grad_in_r_array, (ao_num,n_points_final_grid,3)]
  implicit none
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
index 1af34d74..9393668f 100644
--- a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -38,15 +38,15 @@ BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
    enddo
   enddo
  enddo
- do i = 1, ao_num
-  do j = 1, ao_num
-   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
-    do m = 1, 3
-     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
-    enddo
-   endif
-  enddo
- enddo
+! do i = 1, ao_num
+!  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+!    do m = 1, 3
+!     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+!    enddo
+!   endif
+!  enddo
+! enddo
 
 END_PROVIDER 
 
@@ -76,13 +76,13 @@ BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)]
   enddo
  enddo
 
- do i = 1, ao_num
-  do j = 1, ao_num
-   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
-     ao_prod_sigma(j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
-   endif
-  enddo
- enddo
+! do i = 1, ao_num
+!  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+!     ao_prod_sigma(j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+!   endif
+!  enddo
+! enddo
 
 END_PROVIDER 
 
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
index ada174e5..9b4cbfbd 100644
--- a/src/non_h_ints_mu/grad_squared_manu.irp.f
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -116,7 +116,7 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao
     do j = 1, ao_num
       do i = 1, ao_num
 
-        tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
+        tmp9 = int2_u_grad1u_j1b2_test(i,j,ipoint)
 
         u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(1,i,j,ipoint) &
                                                      + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(2,i,j,ipoint) &
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index 1947bb92..f3a396be 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -14,7 +14,7 @@ program test_ints
 !  my_n_pt_r_grid = 10 ! small grid for quick debug
 !  my_n_pt_a_grid = 14 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
-
+  my_extra_grid_becke = .True.
   my_n_pt_r_extra_grid = 30
   my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug
   touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
@@ -39,7 +39,8 @@ program test_ints
 ! call routine_int2_u_grad1u_j1b2
 ! call test_total_grad_lapl
 ! call test_total_grad_square
- call test_ao_tc_int_chemist
+! call test_ao_tc_int_chemist
+ call test_grid_points_ao
 
 end
 
@@ -584,3 +585,51 @@ subroutine test_total_grad_square
 
 
 end
+
+subroutine test_grid_points_ao
+ implicit none
+ integer :: i,j,ipoint,icount,icount_good, icount_bad,icount_full
+ double precision :: thr
+ thr = 1.d-10
+! print*,'max_n_pts_grid_ao_prod = ',max_n_pts_grid_ao_prod
+! print*,'n_pts_grid_ao_prod'
+ do i = 1, ao_num
+  do j = i, ao_num
+  icount = 0
+  icount_good = 0
+  icount_bad = 0
+  icount_full = 0
+  do ipoint = 1, n_points_final_grid
+!   if(dabs(int2_u_grad1u_x_j1b2_test(1,j,i,ipoint)) & 
+! + dabs(int2_u_grad1u_x_j1b2_test(2,j,i,ipoint)) &
+! + dabs(int2_u_grad1u_x_j1b2_test(2,j,i,ipoint)) )
+!   if(dabs(int2_u2_j1b2_test(j,i,ipoint)).gt.thr)then
+!    icount += 1
+!   endif
+   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_full += 1
+   endif
+   if(dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)).gt.thr)then
+    icount += 1
+    if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_good += 1
+    else
+    print*,j,i,ipoint
+    print*,dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint))/dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint))
+    icount_bad  += 1
+    endif
+   endif
+!   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr)then
+!   endif
+  enddo
+   print*,''
+   print*,j,i
+   print*,icount,icount_full, icount_bad!,n_pts_grid_ao_prod(j,i)
+   print*,dble(icount)/dble(n_points_final_grid),dble(icount_full)/dble(n_points_final_grid)
+!          dble(n_pts_grid_ao_prod(j,i))/dble(n_points_final_grid)
+!   if(icount.gt.n_pts_grid_ao_prod(j,i))then
+!    print*,'pb !!'
+!   endif
+  enddo
+ enddo
+end

From a34653b5d19d05407eac1459c540c2625085260e Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 13 Dec 2022 18:40:37 +0100
Subject: [PATCH 4/9] improved the building of tc_grad_and_lapl_ao_test and
 #tc_grad_square_ao_test

---
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   | 63 +++++++--------
 .../grad_lapl_jmu_manu.irp.f                  |  8 +-
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   | 12 +--
 src/non_h_ints_mu/grad_squared_manu.irp.f     | 27 ++++---
 src/non_h_ints_mu/new_grad_tc_manu.irp.f      | 46 ++++++-----
 src/tc_scf/test_int.irp.f                     | 78 ++++++++++++-------
 6 files changed, 138 insertions(+), 96 deletions(-)

diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
index 7367df88..6f7b29d2 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -1,5 +1,5 @@
 
-BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
 
   BEGIN_DOC
   !
@@ -16,13 +16,14 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
 
   double precision, allocatable :: int_fit_v(:)
   double precision, external    :: overlap_gauss_r12_ao_with1s
-  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),int_j1b,int_gauss,dsqpi_3_2
+  double precision :: int_gauss,dsqpi_3_2,int_j1b
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3)
   dsqpi_3_2 = (dacos(-1.d0))**(3/2)
 
   provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
   call wall_time(wall0)
 
- int2_grad1u2_grad2u2_j1b2_test_no_v(:,:,:) = 0.d0
+ int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                       &
      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
@@ -31,7 +32,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
      !$OMP          final_grid_points_transp, ng_fit_jast,               &
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
-     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test_no_v, ao_abs_comb_b3_j1b,&
+     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,&
      !$OMP          ao_overlap_abs,dsqpi_3_2)
  !$OMP DO SCHEDULE(dynamic)
  do ipoint = 1, n_points_final_grid
@@ -49,7 +50,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
-!         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
@@ -57,15 +57,16 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
          do i_fit = 1, ng_fit_jast
   
            expo_fit = expo_gauss_1_erf_x_2(i_fit)
-!           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+           !DIR$ FORCEINLINE
+           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
            coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
-!           if(dabs(coef_fit)*factor_ij_1s*dabs(int_j1b).lt.1.d-15)cycle
+           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle
   
 !           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
 !                 expo_fit, i, j, int_fit_v, n_points_final_grid)
            int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
   
-           int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) += coef_fit * int_gauss 
+           int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss 
   
          enddo
         enddo
@@ -79,17 +80,17 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
   do ipoint = 1, n_points_final_grid
     do i = 1, ao_num
       do j = 1, i-1
-        int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint)
+        int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_no_v', wall1 - wall0
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test', wall1 - wall0
 
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao_num, n_points_final_grid)]
 !
 !  BEGIN_DOC
 !  !
@@ -104,15 +105,15 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
   double precision              :: tmp
   double precision              :: wall0, wall1
 
-  double precision, allocatable :: int_fit_v(:)
+  double precision, allocatable :: int_fit_v(:),big_array(:,:,:)
   double precision, external    :: overlap_gauss_r12_ao_with1s
 
   provide mu_erf final_grid_points_transp j1b_pen
   call wall_time(wall0)
 
  double precision :: int_j1b
- int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
-!
+ big_array(:,:,:) = 0.d0
+ allocate(big_array(n_points_final_grid,ao_num, ao_num))
  !$OMP PARALLEL DEFAULT (NONE)                                       &
      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
      !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
@@ -120,7 +121,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
      !$OMP          final_grid_points_transp, ng_fit_jast,               &
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
-     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test,&
+     !$OMP          List_comb_thr_b3_cent, big_array,&
      !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
 !
  allocate(int_fit_v(n_points_final_grid))
@@ -151,7 +152,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
                expo_fit, i, j, int_fit_v, size(int_fit_v,1),n_points_final_grid)
 
          do ipoint = 1, n_points_final_grid
-           int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+           big_array(ipoint,j,i) += coef_fit * int_fit_v(ipoint)
          enddo
 
        enddo
@@ -162,17 +163,24 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
  !$OMP END DO
  deallocate(int_fit_v)
  !$OMP END PARALLEL
+ do i = 1, ao_num
+   do j = i, ao_num
+    do ipoint = 1, n_points_final_grid
+     int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,j,i)
+    enddo
+   enddo
+  enddo
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
       do j = 1, i-1
-        int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+        int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,i,j)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test', wall1 - wall0
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_v', wall1 - wall0
 
 END_PROVIDER
 
@@ -192,6 +200,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
 
   double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3)
 
   provide mu_erf final_grid_points j1b_pen
   call wall_time(wall0)
@@ -200,7 +209,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b)          & 
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b,factor_ij_1s,beta_ij,center_ij_1s)          & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
  !$OMP          final_grid_points, ng_fit_jast,                     &
  !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
@@ -231,6 +240,9 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
           
             expo_fit = expo_gauss_j_mu_x_2(i_fit)
             coef_fit = coef_gauss_j_mu_x_2(i_fit)
+            !DIR$ FORCEINLINE
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle
           
             ! ---
           
@@ -323,17 +335,6 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
             expo_fit = expo_gauss_j_mu_1_erf(i_fit)
             coef_fit = coef_gauss_j_mu_1_erf(i_fit)
     
-            ! ---
-    
-!            call NAI_pol_x_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r, int_fit)
-!            tmp_x += coef_fit * int_fit(1)
-!            tmp_y += coef_fit * int_fit(2)
-!            tmp_z += coef_fit * int_fit(3)
-!            if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
-
-          ! ---
-
-
             dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
                         + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
                         + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
@@ -347,7 +348,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
+            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle
             
             call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
index c7a171f8..13ca41f2 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -119,7 +119,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
   double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
   double precision :: tmp_x, tmp_y, tmp_z
   double precision :: wall0, wall1
-  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s
   dsqpi_3_2 = (dacos(-1.d0))**(3/2)
 
   call wall_time(wall0)
@@ -128,7 +128,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
 
  !$OMP PARALLEL DEFAULT (NONE)                                                        &
  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
- !$OMP          int_j1b, tmp_x, tmp_y, tmp_z)                                                  & 
+ !$OMP          int_j1b, tmp_x, tmp_y, tmp_z,factor_ij_1s,beta_ij,center_ij_1s)                                                  & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
  !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
  !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
@@ -157,6 +157,10 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
 
+          ! approximate 1 - erf(mu r12) = exp(-2 mu r12^2)
+!          !DIR$ FORCEINLINE
+!          call gaussian_product(expo_good_j_mu_1gauss,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+!          if(dabs(coef * factor_ij_1s*int_j1b).lt.1.d-10)cycle 
           call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
           call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
 
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
index 9481d363..d22f1ac8 100644
--- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -5,13 +5,13 @@
  integer :: i_1s,i,j,ipoint
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-12
+ thr = 1.d-15
  List_comb_thr_b2_size = 0
  do i = 1, ao_num
   do j = i, ao_num
    do i_1s = 1, List_all_comb_b2_size
      coef        = List_all_comb_b2_coef  (i_1s)
-     if(dabs(coef).lt.1.d-12)cycle
+     if(dabs(coef).lt.1.d-15)cycle
      beta        = List_all_comb_b2_expo  (i_1s)
      beta = max(beta,1.d-12)
      center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
@@ -51,7 +51,7 @@ END_PROVIDER
  integer :: i_1s,i,j,ipoint,icount
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-12
+ thr = 1.d-15
  ao_abs_comb_b2_j1b = 10000000.d0
  do i = 1, ao_num
   do j = i, ao_num
@@ -100,7 +100,7 @@ END_PROVIDER
  integer :: i_1s,i,j,ipoint
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-12
+ thr = 1.d-15
  List_comb_thr_b3_size = 0
  do i = 1, ao_num
   do j = 1, ao_num
@@ -146,7 +146,7 @@ END_PROVIDER
  integer :: i_1s,i,j,ipoint,icount
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-12
+ thr = 1.d-15
  ao_abs_comb_b3_j1b = 10000000.d0
  do i = 1, ao_num
   do j = 1, ao_num
@@ -164,7 +164,7 @@ END_PROVIDER
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
-      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
       icount += 1
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
index 9b4cbfbd..e1fc4d86 100644
--- a/src/non_h_ints_mu/grad_squared_manu.irp.f
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -9,8 +9,11 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu
 
   implicit none
   integer                       :: ipoint, i, j, k, l
-  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: weight1, ao_ik_r, ao_i_r,contrib,contrib2
   double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:)
+  double precision :: wall1, wall0
+  provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test
+  call wall_time(wall0)
 
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
@@ -20,16 +23,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu
   do ipoint = 1, n_points_final_grid
     weight1 = final_weight_at_r_vector(ipoint)
 
-    do i = 1, ao_num
-      ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+    do j = 1, ao_num
+      do l = 1, ao_num
+        contrib =  u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) 
+        contrib2=grad12_j12_test(l,j,ipoint)
+        do i = 1, ao_num
+          ao_i_r = weight1 * aos_in_r_array(i,ipoint)
+        
+          do k = 1, ao_num
+            ao_ik_r = ao_i_r * aos_in_r_array(k,ipoint)
 
-      do k = 1, ao_num
-        ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
-
-        do j = 1, ao_num
-          do l = 1, ao_num
-            ac_mat(k,i,l,j) += ao_ik_r * ( u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) )
-            bc_mat(k,i,l,j) += ao_ik_r * grad12_j12_test(l,j,ipoint)
+            ac_mat(k,i,l,j) += ao_ik_r * contrib
+            bc_mat(k,i,l,j) += ao_ik_r * contrib2
           enddo
         enddo
       enddo
@@ -45,6 +50,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu
       enddo
     enddo
   enddo
+  call wall_time(wall1)
+  print*,'wall time for tc_grad_square_ao_test',wall1 - wall0
 
   deallocate(ac_mat)
   deallocate(bc_mat)
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
index 79d845e8..cceb0991 100644
--- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -95,34 +95,38 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_
   double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
   double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
   double precision, allocatable :: ac_mat(:,:,:,:)
+  double precision :: wall0, wall1
 
+  provide int2_grad1_u12_ao_test 
+  call wall_time(wall0)
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
 
   do ipoint = 1, n_points_final_grid
     weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+    do j = 1, ao_num
+      do l = 1, ao_num
+       contrib_x = int2_grad1_u12_ao_test(1,l,j,ipoint)
+       contrib_y = int2_grad1_u12_ao_test(2,l,j,ipoint)
+       contrib_z = int2_grad1_u12_ao_test(3,l,j,ipoint)
+       do i = 1, ao_num
+         ao_i_r  = weight1 * aos_in_r_array                (i,ipoint)
+         ao_i_dx = weight1 * aos_grad_in_r_array_transp(1,i,ipoint)
+         ao_i_dy = weight1 * aos_grad_in_r_array_transp(2,i,ipoint)
+         ao_i_dz = weight1 * aos_grad_in_r_array_transp(3,i,ipoint)
+       
+         do k = 1, ao_num
+           ao_k_r = aos_in_r_array(k,ipoint)
+       
+           tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp(1,k,ipoint) 
+           tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp(2,k,ipoint) 
+           tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp(3,k,ipoint) 
 
-    do i = 1, ao_num
-      ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
-      ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
-      ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
-      ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+           tmp_x *= contrib_x 
+           tmp_y *= contrib_y 
+           tmp_z *= contrib_z 
 
-      do k = 1, ao_num
-        ao_k_r = aos_in_r_array_transp(ipoint,k)
-
-        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
-        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
-        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
-
-        do j = 1, ao_num
-          do l = 1, ao_num
-
-            contrib_x = int2_grad1_u12_ao_test(1,l,j,ipoint) * tmp_x 
-            contrib_y = int2_grad1_u12_ao_test(2,l,j,ipoint) * tmp_y 
-            contrib_z = int2_grad1_u12_ao_test(3,l,j,ipoint) * tmp_z 
-
-            ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+            ac_mat(k,i,l,j) += tmp_x + tmp_y + tmp_z
           enddo
         enddo
       enddo
@@ -139,6 +143,8 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_
     enddo
   enddo
 
+  call wall_time(wall1)
+  print*,'wall time for tc_grad_and_lapl_ao_test',wall1 - wall0
   deallocate(ac_mat)
 
 END_PROVIDER 
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index f3a396be..d0217423 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -11,8 +11,8 @@ program test_ints
   my_grid_becke  = .True.
   my_n_pt_r_grid = 30
   my_n_pt_a_grid = 50
-!  my_n_pt_r_grid = 10 ! small grid for quick debug
-!  my_n_pt_a_grid = 14 ! small grid for quick debug
+!  my_n_pt_r_grid = 15 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
   my_extra_grid_becke = .True.
   my_n_pt_r_extra_grid = 30
@@ -40,10 +40,21 @@ program test_ints
 ! call test_total_grad_lapl
 ! call test_total_grad_square
 ! call test_ao_tc_int_chemist
- call test_grid_points_ao
+! call test_grid_points_ao
+ call test_tc_scf
 
 end
 
+subroutine test_tc_scf
+ implicit none
+! provide tc_grad_square_ao_test
+  provide tc_grad_and_lapl_ao_test
+! provide int2_u_grad1u_x_j1b2_test
+! provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test
+! print*,'TC_HF_energy = ',TC_HF_energy
+! print*,'grad_non_hermit = ',grad_non_hermit
+end
+
 subroutine test_ao_tc_int_chemist
  implicit none
  provide ao_tc_int_chemist
@@ -309,17 +320,18 @@ end
 subroutine routine_int2_grad1u2_grad2u2_j1b2
  implicit none
  integer :: i,j,ipoint,k,l
+ integer :: ii , jj
  double precision :: weight,accu_relat, accu_abs, contrib
  double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
  double precision, allocatable :: ints(:,:,:)
  allocate(ints(ao_num, ao_num, n_points_final_grid))
-! do ipoint = 1, n_points_final_grid
-!  do i = 1, ao_num
-!   do j = 1, ao_num
-!    read(33,*)ints(j,i,ipoint)
-!   enddo
-!  enddo
-! enddo
+ do ipoint = 1, n_points_final_grid
+  do i = 1, ao_num
+   do j = 1, ao_num
+    read(33,*)ints(j,i,ipoint)
+   enddo
+  enddo
+ enddo
 
  allocate(array(ao_num, ao_num, ao_num, ao_num))
  array = 0.d0
@@ -331,17 +343,17 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
    do l = 1, ao_num
     do i = 1, ao_num
      do j = 1, ao_num
-      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !     !array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !     !array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-!     !array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
-!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint)).gt.1.d-6)then
+!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
 !        print*,j,i,ipoint
-!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint))
-!        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint))
+!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint))
+!        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint))
 !        stop
 !       endif
 !      endif
@@ -350,23 +362,35 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
    enddo
   enddo
  enddo
+ double precision :: e_ref, e_new
  accu_relat = 0.d0
  accu_abs   = 0.d0
-  do k = 1, ao_num
-   do l = 1, ao_num
-    do i = 1, ao_num
-     do j = 1, ao_num
-      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
-      accu_abs += contrib
-      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
-       accu_relat += contrib/dabs(array_ref(j,i,l,k))
-      endif
+ e_ref = 0.d0
+ e_new = 0.d0
+ do ii = 1, elec_alpha_num
+  do jj = ii, elec_alpha_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     do i = 1, ao_num
+      do j = 1, ao_num
+       e_ref += mo_coef(j,ii) * mo_coef(i,ii) * array_ref(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       e_new += mo_coef(j,ii) * mo_coef(i,ii) * array(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+       accu_abs += contrib
+!       if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+!        accu_relat += contrib/dabs(array_ref(j,i,l,k))
+!       endif
+      enddo
      enddo
     enddo
    enddo
+
   enddo
- print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
- print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+ enddo
+ print*,'e_ref = ',e_ref
+ print*,'e_new = ',e_new
+! print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+! print*,'accu_relat = ',accu_relat/dble(ao_num)**4
 
   
 

From 0b7c5fcb977592c6807eb80fe2cddcdd852e40db Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 14 Dec 2022 18:47:32 +0100
Subject: [PATCH 5/9] trying to optimize the most intensive part

---
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   | 31 ++++---
 .../grad_lapl_jmu_manu.irp.f                  | 21 +++--
 src/ao_tc_eff_map/fit_j.irp.f                 | 10 +++
 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f      | 83 ++++++++++++++-----
 src/non_h_ints_mu/grad_squared_manu.irp.f     |  3 +-
 src/tc_scf/test_int.irp.f                     | 49 ++++++++++-
 6 files changed, 151 insertions(+), 46 deletions(-)

diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
index 6f7b29d2..d5210aa7 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -17,8 +17,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
   double precision, allocatable :: int_fit_v(:)
   double precision, external    :: overlap_gauss_r12_ao_with1s
   double precision :: int_gauss,dsqpi_3_2,int_j1b
-  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3)
-  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2 
+  sq_pi_3_2 = (dacos(-1.d0))**(3/2)
 
   provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
   call wall_time(wall0)
@@ -33,7 +33,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
      !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,&
-     !$OMP          ao_overlap_abs,dsqpi_3_2)
+     !$OMP          ao_overlap_abs,sq_pi_3_2)
  !$OMP DO SCHEDULE(dynamic)
  do ipoint = 1, n_points_final_grid
    r(1) = final_grid_points(1,ipoint)
@@ -60,7 +60,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
            !DIR$ FORCEINLINE
            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
            coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
-           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle
+!           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+           if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-10)cycle
   
 !           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
 !                 expo_fit, i, j, int_fit_v, n_points_final_grid)
@@ -200,7 +201,8 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
 
   double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
-  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3)
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2
+  sq_pi_3_2 = (dacos(-1.d0))**(3/2)
 
   provide mu_erf final_grid_points j1b_pen
   call wall_time(wall0)
@@ -213,7 +215,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
  !$OMP          final_grid_points, ng_fit_jast,                     &
  !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
- !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2,       & 
  !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
  !$OMP DO
   do ipoint = 1, n_points_final_grid
@@ -242,7 +244,8 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
             coef_fit = coef_gauss_j_mu_x_2(i_fit)
             !DIR$ FORCEINLINE
             call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle
+!            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-10)cycle
           
             ! ---
           
@@ -291,8 +294,8 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
   double precision :: coef, beta, B_center(3), dist
   double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
   double precision :: tmp_x, tmp_y, tmp_z, int_j1b
-  double precision :: wall0, wall1
-
+  double precision :: wall0, wall1, sq_pi_3_2,sq_alpha
+  sq_pi_3_2 = dacos(-1.D0)**(3/2)
   provide mu_erf final_grid_points j1b_pen
   call wall_time(wall0)
 
@@ -302,12 +305,12 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
  !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
  !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
- !$OMP          tmp_x, tmp_y, tmp_z,int_j1b)                        & 
+ !$OMP          tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha)                        & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
  !$OMP          final_grid_points, ng_fit_jast,                     &
  !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
  !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
- !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2)
  !$OMP DO
 
   do ipoint = 1, n_points_final_grid
@@ -348,7 +351,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle
+            sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv)
+!            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version
+            if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle
             
             call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
@@ -450,7 +455,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p
 
             expo_fit = expo_gauss_j_mu_1_erf(i_fit)
             call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-!            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
+            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
             coef_fit = coef_gauss_j_mu_1_erf(i_fit)
 
             alpha_1s     = beta + expo_fit
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
index 13ca41f2..f71a66e6 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -50,7 +50,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num,
           B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
-
+          ! TODO :: cycle on the 1 - erf(mur12)
           int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
           int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
 
@@ -122,6 +122,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
   double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s
   dsqpi_3_2 = (dacos(-1.d0))**(3/2)
 
+  provide expo_erfc_mu_gauss ao_prod_sigma ao_prod_center
   call wall_time(wall0)
 
   x_v_ij_erf_rk_cst_mu_tmp_j1b_test = 0.d0
@@ -132,9 +133,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
  !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
  !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
- !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma)
+! !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss)
  !$OMP DO
-  !do ipoint = 1, 10
   do ipoint = 1, n_points_final_grid
     r(1) = final_grid_points(1,ipoint)
     r(2) = final_grid_points(2,ipoint)
@@ -142,7 +143,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
 
     do i = 1, ao_num
       do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
 
         tmp_x = 0.d0
         tmp_y = 0.d0
@@ -157,10 +158,14 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
 
-          ! approximate 1 - erf(mu r12) = exp(-2 mu r12^2)
-!          !DIR$ FORCEINLINE
-!          call gaussian_product(expo_good_j_mu_1gauss,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-!          if(dabs(coef * factor_ij_1s*int_j1b).lt.1.d-10)cycle 
+!          if(ao_prod_center(1,j,i).ne.10000.d0)then
+!           ! approximate 1 - erf(mu r12) by a gaussian * 10
+!           !DIR$ FORCEINLINE
+!           call gaussian_product(expo_erfc_mu_gauss,r,     &
+!                ao_prod_sigma(j,i),ao_prod_center(1,j,i),  & 
+!                factor_ij_1s,beta_ij,center_ij_1s)
+!           if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-3/2)).lt.1.d-10)cycle 
+!          endif
           call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
           call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
 
diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f
index d861054e..902d4514 100644
--- a/src/ao_tc_eff_map/fit_j.irp.f
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@@ -13,6 +13,16 @@
 END_PROVIDER 
 ! ---
 
+BEGIN_PROVIDER [ double precision, expo_erfc_gauss ]
+ implicit none 
+ expo_erfc_gauss = 1.41211d0
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, expo_erfc_mu_gauss ]
+ implicit none 
+ expo_erfc_mu_gauss = expo_erfc_gauss * mu_erf * mu_erf
+END_PROVIDER 
+
  BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ]
 &BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ]
  implicit none
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
index 9393668f..39ea0cdf 100644
--- a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -1,5 +1,28 @@
+
+BEGIN_PROVIDER [ double precision, ao_abs_int_grid, (ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_abs_int_grid(i) = \int dr |phi_i(r) |
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_abs_int_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+    contrib = dabs(aos_in_r_array(i,ipoint)) * weight
+    ao_abs_int_grid(i) += contrib 
+  enddo
+ enddo
+
+END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, ao_overlap_abs_grid, (ao_num, ao_num)]
  implicit none
+ BEGIN_DOC
+! ao_overlap_abs_grid(j,i) = \int dr |phi_i(r) phi_j(r)| 
+ END_DOC
  integer :: i,j,ipoint
  double precision :: contrib, weight,r(3)
  ao_overlap_abs_grid = 0.D0
@@ -21,7 +44,7 @@ BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
  BEGIN_DOC
 ! ao_prod_center(1:3,j,i) = \int dr |phi_i(r) phi_j(r)| x/y/z / \int |phi_i(r) phi_j(r)|
 !
-! if \int |phi_i(r) phi_j(r)| < 1.d-15 then ao_prod_center = 0.
+! if \int |phi_i(r) phi_j(r)| < 1.d-10 then ao_prod_center = 10000.
  END_DOC
  integer :: i,j,m,ipoint
  double precision :: contrib, weight,r(3)
@@ -38,26 +61,29 @@ BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
    enddo
   enddo
  enddo
-! do i = 1, ao_num
-!  do j = 1, ao_num
-!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
-!    do m = 1, 3
-!     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
-!    enddo
-!   endif
-!  enddo
-! enddo
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+    do m = 1, 3
+     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+    enddo
+   else
+    do m = 1, 3
+     ao_prod_center(m,j,i) = 10000.d0
+    enddo
+   endif
+  enddo
+ enddo
 
 END_PROVIDER 
 
-BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)]
+BEGIN_PROVIDER [ double precision, ao_prod_abs_r, (ao_num, ao_num)]
  implicit none
  BEGIN_DOC
-! ao_prod_sigma(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
+! ao_prod_abs_r(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
 !
-! gives you a precise idea of the spatial extension of the distribution phi_i(r) phi_j(r)
  END_DOC
- ao_prod_sigma = 0.d0
+ ao_prod_abs_r = 0.d0
  integer :: i,j,m,ipoint
  double precision :: contrib, weight,r(3),contrib_x2
  do ipoint = 1,n_points_final_grid 
@@ -71,21 +97,34 @@ BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)]
      contrib_x2 += (r(m) - ao_prod_center(m,j,i)) * (r(m) - ao_prod_center(m,j,i)) 
     enddo
     contrib_x2 = dsqrt(contrib_x2)
-    ao_prod_sigma(j,i) += contrib * contrib_x2
+    ao_prod_abs_r(j,i) += contrib * contrib_x2
    enddo
   enddo
  enddo
 
-! do i = 1, ao_num
-!  do j = 1, ao_num
-!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
-!     ao_prod_sigma(j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
-!   endif
-!  enddo
-! enddo
 
 END_PROVIDER 
 
+ BEGIN_PROVIDER [double precision, ao_prod_sigma, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! Gaussian exponent reproducing the product |chi_i(r) chi_j(r)| 
+!
+! Therefore |chi_i(r) chi_j(r)|  \approx e^{-ao_prod_sigma(j,i) (r - ao_prod_center(1:3,j,i))**2}
+ END_DOC
+ integer :: i,j
+ double precision :: pi,alpha
+ pi = dacos(-1.d0)
+ do i = 1, ao_num
+  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-5)then
+     alpha = 1.d0/pi * (2.d0*ao_overlap_abs_grid(j,i)/ao_prod_abs_r(j,i))**2
+     ao_prod_sigma(j,i) = alpha
+!   endif
+  enddo
+ enddo
+ END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_final_grid)]
  implicit none
  BEGIN_DOC
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
index e1fc4d86..14749082 100644
--- a/src/non_h_ints_mu/grad_squared_manu.irp.f
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -99,6 +99,7 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao
   double precision           :: time0, time1
   double precision, external :: overlap_gauss_r12_ao
 
+  provide int2_u_grad1u_x_j1b2_test
   print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...'
   call wall_time(time0)
 
@@ -147,7 +148,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_fi
   double precision           :: tmp1
   double precision           :: time0, time1
   double precision, external :: overlap_gauss_r12_ao
-
+  provide int2_grad1u2_grad2u2_j1b2_test
   print*, ' providing grad12_j12_test ...'
   call wall_time(time0)
 
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index d0217423..a81b09d5 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -41,14 +41,21 @@ program test_ints
 ! call test_total_grad_square
 ! call test_ao_tc_int_chemist
 ! call test_grid_points_ao
- call test_tc_scf
+! call test_tc_scf
+ call test_int_gauss
 
 end
 
 subroutine test_tc_scf
  implicit none
+ integer :: i
+! provide int2_u_grad1u_x_j1b2_test
+ provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test
+! do i = 1, ng_fit_jast
+!  print*,expo_gauss_1_erf_x_2(i),coef_gauss_1_erf_x_2(i)
+! enddo
 ! provide tc_grad_square_ao_test
-  provide tc_grad_and_lapl_ao_test
+!  provide tc_grad_and_lapl_ao_test
 ! provide int2_u_grad1u_x_j1b2_test
 ! provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test
 ! print*,'TC_HF_energy = ',TC_HF_energy
@@ -657,3 +664,41 @@ subroutine test_grid_points_ao
   enddo
  enddo
 end
+
+subroutine test_int_gauss
+ implicit none
+ integer :: i,j
+ print*,'center'
+ do i = 1, ao_num
+  do j = i, ao_num
+   print*,j,i
+   print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i)
+   print*,ao_prod_center(1:3,j,i)
+  enddo
+ enddo
+ print*,''
+ double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2
+ center = 0.d0
+ pi = dacos(-1.d0)
+ integral_1 = 0.d0
+ integral_2 = 0.d0
+ alpha = 0.75d0
+ do i = 1,  n_points_final_grid
+  ! you get x, y and z of the ith grid point
+  r(1) = final_grid_points(1,i)
+  r(2) = final_grid_points(2,i)
+  r(3) = final_grid_points(3,i)
+  weight = final_weight_at_r_vector(i)
+  distance = dsqrt( (r(1) - center(1))**2 +  (r(2) - center(2))**2 + (r(3) - center(3))**2 )
+  f_r = dexp(-alpha * distance*distance)
+  ! you add the contribution of the grid point to the integral
+  integral_1 += f_r * weight
+  integral_2 += f_r * distance * weight
+ enddo
+ print*,'integral_1      =',integral_1
+ print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5
+ print*,'integral_2      =',integral_2
+ print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2
+
+
+end

From 3f0326e6c2ec666e5d6ed593531fc4406343c23a Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 16 Dec 2022 00:05:51 +0100
Subject: [PATCH 6/9] fixed stupid bug in
 ao_many_one_e_ints/listj1b_sorted.irp.f

---
 src/ao_many_one_e_ints/listj1b_sorted.irp.f | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
index d22f1ac8..bf493fbb 100644
--- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -22,7 +22,7 @@
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
-      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
       List_comb_thr_b2_size(j,i) += 1
@@ -68,7 +68,7 @@ END_PROVIDER
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
-      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
       icount += 1

From a3bc5fd421f6225fadea961bc8452b00c3769967 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 20 Dec 2022 00:07:52 +0100
Subject: [PATCH 7/9] added the computation of 3-e term for energy in
 open-shell tc_scf

---
 src/tc_bi_ortho/test_tc_fock.irp.f | 82 ++++++++++++++++++++++++++++--
 src/tc_scf/fock_tc.irp.f           |  4 +-
 2 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f
index 26446daf..ebd43a7a 100644
--- a/src/tc_bi_ortho/test_tc_fock.irp.f
+++ b/src/tc_bi_ortho/test_tc_fock.irp.f
@@ -15,7 +15,8 @@ program test_tc_fock
   !call routine_2
 !  call routine_3()
 
- call test_3e
+! call test_3e
+ call routine_tot
 end
 
 ! ---
@@ -84,8 +85,8 @@ subroutine routine_3()
         print*, i, a
         stop
        endif
-       !print*, ' excited det'
-       !call debug_det(det_i, N_int)
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
  
        call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
        if(dabs(hthree).lt.1.d-10)cycle
@@ -116,3 +117,78 @@ subroutine routine_3()
 end subroutine routine_3
 
 ! ---
+subroutine routine_tot()
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i, a, i_ok, s1,other_spin(2)
+  double precision               :: hmono, htwoe, hthree, htilde_ij
+  double precision               :: err_ai, err_tot, ref, new
+  integer(bit_kind), allocatable :: det_i(:,:)
+
+  allocate(det_i(N_int,2))
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  err_tot = 0.d0
+ 
+!  do s1 = 1, 2
+   s1 = 2
+   det_i = ref_bitmask
+   call debug_det(det_i, N_int)
+   print*, ' HF det'
+   call debug_det(det_i, N_int)
+ 
+!   do i = 1, elec_num_tab(s1)
+!     do a = elec_num_tab(s1)+1, mo_num ! virtual 
+   do i = 1, elec_beta_num
+     do a = elec_beta_num+1, elec_alpha_num! virtual 
+!   do i = elec_beta_num+1, elec_alpha_num
+!     do a = elec_alpha_num+1, mo_num! virtual 
+       print*,i,a 
+ 
+       det_i = ref_bitmask
+       call do_single_excitation(det_i, i, a, s1, i_ok)
+       if(i_ok == -1) then
+        print*, 'PB !!'
+        print*, i, a
+        stop
+       endif
+ 
+       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       print*,htilde_ij
+       if(dabs(htilde_ij).lt.1.d-10)cycle
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
+
+       if(s1 == 1)then
+        new = Fock_matrix_tc_mo_alpha(a,i)
+       else
+        new = Fock_matrix_tc_mo_beta(a,i)
+       endif
+       ref = htilde_ij
+!       if(s1 == 1)then
+!        new = fock_a_tot_3e_bi_orth(a,i)
+!       else if(s1 == 2)then
+!        new = fock_b_tot_3e_bi_orth(a,i)
+!       endif
+       err_ai = dabs(dabs(ref) - dabs(new))
+       if(err_ai .gt. 1d-7) then
+         print*,'s1 = ',s1
+         print*, ' warning on', i, a
+         print*, ref,new,err_ai
+       endif
+       print*, ref,new,err_ai
+       err_tot += err_ai
+ 
+       write(22, *) htilde_ij
+     enddo
+   enddo
+!  enddo
+
+  print *, ' err_tot = ', err_tot
+
+  deallocate(det_i)
+
+end subroutine routine_3
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
index c3642a7e..2a08e469 100644
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@@ -89,7 +89,7 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
 
     call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
                           , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
-    if(three_body_h_tc) then
+    if(three_body_h_tc.and.elec_alpha_num == elec_beta_num) then
       Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
     endif
 
@@ -116,7 +116,7 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
                          , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
 
-    if(three_body_h_tc) then
+    if(three_body_h_tc.and.elec_alpha_num == elec_beta_num) then
       Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
     endif
 

From f7e58e4a636af0ab066aa644a74ab56cb4de6041 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 21 Dec 2022 13:54:20 +0100
Subject: [PATCH 8/9] added print_hmat and sparse_mat

---
 src/determinants/filter_connected.irp.f |   1 -
 src/determinants/sparse_mat.irp.f       | 164 ++++++++++++++++++++++++
 src/tc_scf/fock_tc_mo_tot.irp.f         |  23 ++++
 src/tc_scf/fock_three.irp.f             |   2 +
 src/tools/print_hmat.irp.f              |  90 +++++++++++++
 5 files changed, 279 insertions(+), 1 deletion(-)
 create mode 100644 src/determinants/sparse_mat.irp.f
 create mode 100644 src/tools/print_hmat.irp.f

diff --git a/src/determinants/filter_connected.irp.f b/src/determinants/filter_connected.irp.f
index 6110eb89..2c9d7a49 100644
--- a/src/determinants/filter_connected.irp.f
+++ b/src/determinants/filter_connected.irp.f
@@ -96,7 +96,6 @@ subroutine filter_not_connected(key1,key2,Nint,sze,idx)
   idx(0) = l-1
 end
 
-
 subroutine filter_connected(key1,key2,Nint,sze,idx)
   use bitmasks
   implicit none
diff --git a/src/determinants/sparse_mat.irp.f b/src/determinants/sparse_mat.irp.f
new file mode 100644
index 00000000..889bbeba
--- /dev/null
+++ b/src/determinants/sparse_mat.irp.f
@@ -0,0 +1,164 @@
+  use bitmasks
+
+subroutine filter_connected_array(key1,key2,ld,Nint,sze,idx)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Filters out the determinants that are not connected by H
+  !
+  ! returns the array idx which contains the index of the
+  !
+  ! determinants in the array key1 that interact
+  !
+  ! via the H operator with key2.
+  !
+  ! idx(0) is the number of determinants that interact with key1
+  END_DOC
+  integer, intent(in)            :: Nint, ld,sze
+  integer(bit_kind), intent(in)  :: key1(Nint,2,ld)
+  integer(bit_kind), intent(in)  :: key2(Nint,2)
+  integer, intent(out)           :: idx(0:sze)
+
+  integer                        :: i,j,l
+  integer                        :: degree_x2
+
+  ASSERT (Nint > 0)
+  ASSERT (sze >= 0)
+
+  l=1
+
+  if (Nint==1) then
+
+    !DIR$ LOOP COUNT (1000)
+    do i=1,sze
+      degree_x2 = popcnt(    xor( key1(1,1,i), key2(1,1))) &
+                + popcnt(    xor( key1(1,2,i), key2(1,2)))
+!      print*,degree_x2
+      if (degree_x2 > 4) then
+        cycle
+      else
+        idx(l) = i
+        l = l+1
+      endif
+    enddo
+
+  else if (Nint==2) then
+
+    !DIR$ LOOP COUNT (1000)
+    do i=1,sze
+      degree_x2 =  popcnt(xor( key1(1,1,i), key2(1,1))) +            &
+          popcnt(xor( key1(2,1,i), key2(2,1))) +                     &
+          popcnt(xor( key1(1,2,i), key2(1,2))) +                     &
+          popcnt(xor( key1(2,2,i), key2(2,2)))
+      if (degree_x2 > 4) then
+        cycle
+      else
+        idx(l) = i
+        l = l+1
+      endif
+    enddo
+
+  else if (Nint==3) then
+
+    !DIR$ LOOP COUNT (1000)
+    do i=1,sze
+      degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) +             &
+          popcnt(xor( key1(1,2,i), key2(1,2))) +                     &
+          popcnt(xor( key1(2,1,i), key2(2,1))) +                     &
+          popcnt(xor( key1(2,2,i), key2(2,2))) +                     &
+          popcnt(xor( key1(3,1,i), key2(3,1))) +                     &
+          popcnt(xor( key1(3,2,i), key2(3,2)))
+      if (degree_x2 > 4) then
+        cycle
+      else
+        idx(l) = i
+        l = l+1
+      endif
+    enddo
+
+  else
+
+    !DIR$ LOOP COUNT (1000)
+    do i=1,sze
+      degree_x2 = 0
+      !DIR$ LOOP COUNT MIN(4)
+      do j=1,Nint
+        degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +&
+            popcnt(xor( key1(j,2,i), key2(j,2)))
+        if (degree_x2 > 4) then
+          exit
+        endif
+      enddo
+      if (degree_x2 <= 5) then
+        idx(l) = i
+        l = l+1
+      endif
+    enddo
+
+  endif
+  idx(0) = l-1
+!  print*,'idx(0) = ',idx(0)
+end
+
+
+ BEGIN_PROVIDER [ integer, n_sparse_mat]
+&BEGIN_PROVIDER [ integer, n_connected_per_det, (N_det)]
+&BEGIN_PROVIDER [ integer, n_max_connected_per_det]
+ implicit none
+ BEGIN_DOC
+! n_sparse_mat = total number of connections in the CI matrix 
+!
+! n_connected_per_det(i) = number of connected determinants to the determinant psi_det(1,1,i)
+!
+! n_max_connected_per_det = maximum number of connected determinants 
+ END_DOC
+ integer, allocatable :: idx(:)
+ allocate(idx(0:N_det))
+ integer :: i
+ n_sparse_mat = 0
+ do i = 1, N_det
+  call filter_connected_array(psi_det_sorted,psi_det_sorted(1,1,i),psi_det_size,N_int,N_det,idx)
+  n_connected_per_det(i) = idx(0)
+  n_sparse_mat += idx(0)
+ enddo
+ n_max_connected_per_det = maxval(n_connected_per_det)
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), connected_det_per_det, (N_int,2,n_max_connected_per_det,N_det)]
+&BEGIN_PROVIDER [ integer(bit_kind), list_connected_det_per_det, (n_max_connected_per_det,N_det)]
+ implicit none
+ BEGIN_DOC
+! connected_det_per_det(:,:,j,i) = jth connected determinant to the determinant psi_det(:,:,i)
+!
+! list_connected_det_per_det(j,i) = index of jth determinant in psi_det which is connected to psi_det(:,:,i)
+ END_DOC
+ integer, allocatable :: idx(:)
+ allocate(idx(0:N_det))
+ integer :: i,j
+ do i = 1, N_det
+  call filter_connected_array(psi_det_sorted,psi_det_sorted(1,1,i),psi_det_size,N_int,N_det,idx)
+  do j = 1, idx(0)
+   connected_det_per_det(1:N_int,1:2,j,i) = psi_det_sorted(1:N_int,1:2,idx(j))
+   list_connected_det_per_det(j,i) = idx(j)
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, sparse_h_mat, (n_max_connected_per_det, N_det)]
+ implicit none
+ BEGIN_DOC
+! sparse matrix format 
+!
+! sparse_h_mat(j,i) = matrix element between the jth connected determinant and psi_det(:,:,i)
+ END_DOC
+ integer :: i,j 
+ double precision :: hij
+ do i = 1, N_det
+  do j = 1, n_connected_per_det(i)
+   call i_H_j(psi_det(1,1,i),connected_det_per_det(1,1,j,i),N_int,hij)
+   sparse_h_mat(j,i) = hij
+  enddo
+ enddo
+
+END_PROVIDER 
+
diff --git a/src/tc_scf/fock_tc_mo_tot.irp.f b/src/tc_scf/fock_tc_mo_tot.irp.f
index a99c7698..2f33cd17 100644
--- a/src/tc_scf/fock_tc_mo_tot.irp.f
+++ b/src/tc_scf/fock_tc_mo_tot.irp.f
@@ -73,6 +73,29 @@
              + (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
        enddo
      enddo
+     if(three_body_h_tc)then
+      ! C-O
+      do j = 1, elec_beta_num
+       do i = elec_beta_num+1, elec_alpha_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! C-V
+      do j = 1, elec_beta_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! O-V
+      do j = elec_beta_num+1, elec_alpha_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+     endif
 
    endif
 
diff --git a/src/tc_scf/fock_three.irp.f b/src/tc_scf/fock_three.irp.f
index 35b6aac6..3901f707 100644
--- a/src/tc_scf/fock_three.irp.f
+++ b/src/tc_scf/fock_three.irp.f
@@ -128,6 +128,8 @@ BEGIN_PROVIDER [double precision, diag_three_elem_hf]
       call give_abb_contrib(integral_abb)
       call give_bbb_contrib(integral_bbb)
       diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+!      print*,'integral_aaa + integral_aab + integral_abb + integral_bbb'
+!      print*,integral_aaa , integral_aab , integral_abb , integral_bbb
 
     endif
 
diff --git a/src/tools/print_hmat.irp.f b/src/tools/print_hmat.irp.f
new file mode 100644
index 00000000..48001e44
--- /dev/null
+++ b/src/tools/print_hmat.irp.f
@@ -0,0 +1,90 @@
+program print_h_mat
+  implicit none
+  BEGIN_DOC
+  ! program that prints out the CI matrix in sparse form 
+  END_DOC
+  read_wf = .True. 
+  touch read_wf 
+  call print_wf_dets
+  call print_wf_coef
+  call sparse_mat
+  call full_mat
+  call test_sparse_mat
+end
+
+subroutine print_wf_dets
+ implicit none
+ integer :: i,j
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.wf_det'
+ i_unit_output = getUnitAndOpen(output,'w')
+ write(i_unit_output,*)N_det,N_int
+ do i = 1, N_det
+  write(i_unit_output,*)psi_det_sorted(1:N_int,1,i)
+  write(i_unit_output,*)psi_det_sorted(1:N_int,2,i)
+ enddo
+end
+
+subroutine print_wf_coef
+ implicit none
+ integer :: i,j
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.wf_coef'
+ i_unit_output = getUnitAndOpen(output,'w')
+ write(i_unit_output,*)N_det,N_states
+ do i = 1, N_det
+  write(i_unit_output,*)psi_coef_sorted(i,1:N_states)
+ enddo
+end
+
+subroutine sparse_mat
+ implicit none
+ integer :: i,j
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.hmat_sparse'
+ i_unit_output = getUnitAndOpen(output,'w')
+ do i = 1, N_det
+  write(i_unit_output,*)i,n_connected_per_det(i)
+  do j =1, n_connected_per_det(i)
+   write(i_unit_output,*)list_connected_det_per_det(j,i),sparse_h_mat(j,i)
+  enddo
+ enddo
+end
+
+
+subroutine full_mat
+ implicit none
+ integer :: i,j
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.hmat_full'
+ i_unit_output = getUnitAndOpen(output,'w')
+ do i = 1, N_det
+  do j = i, N_det
+   write(i_unit_output,*)i,j,H_matrix_all_dets(j,i)
+  enddo
+ enddo
+end
+
+
+subroutine test_sparse_mat
+ implicit none
+ integer :: i,j
+ double precision, allocatable :: eigvec(:,:), eigval(:), hmat(:,:)
+ allocate(eigval(N_det), eigvec(N_det,N_det),hmat(N_det,N_det))
+ hmat = 0.d0
+ do i = 1, N_det
+  do j =1, n_connected_per_det(i)
+   hmat(list_connected_det_per_det(j,i),i) = sparse_h_mat(j,i)
+  enddo
+ enddo
+ call lapack_diag(eigval,eigvec,hmat,N_det,N_det) 
+ print*,'The two energies should be the same '
+ print*,'eigval(1) = ',eigval(1)
+ print*,'psi_energy= ',CI_electronic_energy(1)
+
+
+end

From eee1fe86733bb34628d5243f3fd7c9f2e98ca76a Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Thu, 22 Dec 2022 14:42:38 +0100
Subject: [PATCH 9/9] prep for PR

---
 src/bi_ort_ints/semi_num_ints_mo.irp.f | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
index 89098676..a548ee62 100644
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -178,8 +178,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3,
   integer :: i, j, ipoint
 
   do ipoint = 1, n_points_final_grid
-    do i = 1, mo_num
-      do j = 1, mo_num
+    do i = 1, ao_num
+      do j = 1, ao_num
         int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(1,j,i,ipoint)
         int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(2,j,i,ipoint)
         int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(3,j,i,ipoint)