diff --git a/src/ao_basis/aos_in_r.irp.f b/src/ao_basis/aos_in_r.irp.f
index 902827eb..7fcb980a 100644
--- a/src/ao_basis/aos_in_r.irp.f
+++ b/src/ao_basis/aos_in_r.irp.f
@@ -12,21 +12,21 @@ double precision function ao_value(i,r)
  integer :: power_ao(3)
  double precision :: accu,dx,dy,dz,r2
  num_ao = ao_nucl(i)
-! power_ao(1:3)= ao_power(i,1:3)
-! center_ao(1:3) = nucl_coord(num_ao,1:3)
-! dx = (r(1) - center_ao(1))
-! dy = (r(2) - center_ao(2))
-! dz = (r(3) - center_ao(3))
-! r2 = dx*dx + dy*dy + dz*dz
-! dx = dx**power_ao(1)
-! dy = dy**power_ao(2)
-! dz = dz**power_ao(3)
+ power_ao(1:3)= ao_power(i,1:3)
+ center_ao(1:3) = nucl_coord(num_ao,1:3)
+ dx = (r(1) - center_ao(1))
+ dy = (r(2) - center_ao(2))
+ dz = (r(3) - center_ao(3))
+ r2 = dx*dx + dy*dy + dz*dz
+ dx = dx**power_ao(1)
+ dy = dy**power_ao(2)
+ dz = dz**power_ao(3)
 
  accu = 0.d0
-! do m=1,ao_prim_num(i)
-!   beta = ao_expo_ordered_transp(m,i)
-!   accu += ao_coef_normalized_ordered_transp(m,i) * dexp(-beta*r2)
-! enddo
+ do m=1,ao_prim_num(i)
+   beta = ao_expo_ordered_transp(m,i)
+   accu += ao_coef_normalized_ordered_transp(m,i) * dexp(-beta*r2)
+ enddo
  ao_value = accu * dx * dy * dz
 
 end
diff --git a/src/ao_many_one_e_ints/NEED b/src/ao_many_one_e_ints/NEED
index 0d08442c..c57219cd 100644
--- a/src/ao_many_one_e_ints/NEED
+++ b/src/ao_many_one_e_ints/NEED
@@ -3,3 +3,4 @@ ao_two_e_ints
 becke_numerical_grid
 mo_one_e_ints
 dft_utils_in_r
+tc_keywords
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index 8196614f..7c68de75 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -1,4 +1,72 @@
 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) [1 - erf(mu r12)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+
+  double precision, external    :: overlap_gauss_r12_ao
+
+  print*, ' providing int2_grad1u2_grad2u2 ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  int2_grad1u2_grad2u2 = 0.d0
+
+  !$OMP PARALLEL DEFAULT (NONE)                                               &
+  !$OMP PRIVATE (ipoint, i, j, i_fit, r, coef_fit, expo_fit, tmp)             & 
+  !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points, ng_fit_jast, &
+  !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,int2_grad1u2_grad2u2)
+  !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_1_erf_x_2(i_fit)
+          coef_fit = coef_gauss_1_erf_x_2(i_fit)
+
+          tmp += -0.25d0 * coef_fit * overlap_gauss_r12_ao(r, expo_fit, i, j)
+        enddo
+
+        int2_grad1u2_grad2u2(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2(j,i,ipoint) = int2_grad1u2_grad2u2(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2 =', wall1 - wall0
+
+END_PROVIDER 
+
 ! ---
 
 BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
@@ -26,15 +94,15 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
 
   int2_grad1u2_grad2u2_j1b2 = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, ng_fit_jast,                     &
- !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,         &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2)
- !$OMP DO
+  !$OMP PARALLEL DEFAULT (NONE)                                      &
+  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+  !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+  !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+  !$OMP          final_grid_points, ng_fit_jast,                     &
+  !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,         &
+  !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+  !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2)
+  !$OMP DO
   do ipoint = 1, n_points_final_grid
     r(1) = final_grid_points(1,ipoint)
     r(2) = final_grid_points(2,ipoint)
@@ -53,7 +121,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += -0.25d0 * coef_fit * int_fit
-!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
+!         if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -78,8 +146,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END DO
+  !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
@@ -96,7 +164,7 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
 
   BEGIN_DOC
   !
@@ -120,15 +188,15 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
 
   int2_u2_j1b2 = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, ng_fit_jast,                     &
- !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
- !$OMP DO
+  !$OMP PARALLEL DEFAULT (NONE)                                      &
+  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+  !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+  !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+  !$OMP          final_grid_points, ng_fit_jast,                     &
+  !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+  !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+  !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
+  !$OMP DO
   do ipoint = 1, n_points_final_grid
     r(1) = final_grid_points(1,ipoint)
     r(2) = final_grid_points(2,ipoint)
@@ -147,7 +215,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += coef_fit * int_fit
-!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
+!         if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -172,8 +240,8 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END DO
+  !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
index fc30cd83..25bb2f8b 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
@@ -24,12 +24,12 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
 
   v_ij_erf_rk_cst_mu_j1b = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                                         &
- !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp) & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points, &
- !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,   &
- !$OMP          v_ij_erf_rk_cst_mu_j1b, mu_erf)
- !$OMP DO
+  !$OMP PARALLEL DEFAULT (NONE)                                                         &
+  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp) & 
+  !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points, &
+  !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,   &
+  !$OMP          v_ij_erf_rk_cst_mu_j1b, mu_erf)
+  !$OMP DO
   !do ipoint = 1, 10
   do ipoint = 1, n_points_final_grid
     r(1) = final_grid_points(1,ipoint)
@@ -51,7 +51,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
 
         int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
         int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
-!        if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle
+!       if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle
 
         tmp += coef * (int_mu - int_coulomb)
 
@@ -77,8 +77,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END DO
+  !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
@@ -112,13 +112,13 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_
 
   x_v_ij_erf_rk_cst_mu_j1b = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                                        &
- !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
- !$OMP          tmp_x, tmp_y, tmp_z)                                                  & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,&
- !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,  &
- !$OMP          x_v_ij_erf_rk_cst_mu_j1b, mu_erf)
- !$OMP DO
+  !$OMP PARALLEL DEFAULT (NONE)                                                        &
+  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
+  !$OMP          tmp_x, tmp_y, tmp_z)                                                  & 
+  !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,&
+  !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,  &
+  !$OMP          x_v_ij_erf_rk_cst_mu_j1b, mu_erf)
+  !$OMP DO
   !do ipoint = 1, 10
   do ipoint = 1, n_points_final_grid
     r(1) = final_grid_points(1,ipoint)
@@ -143,7 +143,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
 
-!        if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
+!       if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
 
         tmp_x += coef * (ints(1) - ints_coulomb(1))
         tmp_y += coef * (ints(2) - ints_coulomb(2))
@@ -175,8 +175,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END DO
+  !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
@@ -220,15 +220,15 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
 
   v_ij_u_cst_mu_j1b = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, & 
- !$OMP          final_grid_points, ng_fit_jast,                     &
- !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
- !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo,       & 
- !$OMP          List_all_comb_b2_cent, v_ij_u_cst_mu_j1b)
- !$OMP DO
+  !$OMP PARALLEL DEFAULT (NONE)                                      &
+  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+  !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+  !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, & 
+  !$OMP          final_grid_points, ng_fit_jast,                     &
+  !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+  !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo,       & 
+  !$OMP          List_all_comb_b2_cent, v_ij_u_cst_mu_j1b)
+  !$OMP DO
   !do ipoint = 1, 10
   do ipoint = 1, n_points_final_grid
     r(1) = final_grid_points(1,ipoint)
@@ -253,7 +253,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
           B_center(3) = List_all_comb_b2_cent(3,1)
 
           int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
-!          if(dabs(int_fit*coef) .lt. 1d-12) cycle
+!         if(dabs(int_fit*coef) .lt. 1d-12) cycle
 
           tmp += coef * coef_fit * int_fit
 
@@ -280,8 +280,8 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
       enddo
     enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
+  !$OMP END DO
+  !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f
index 4698cb27..02963605 100644
--- a/src/ao_many_one_e_ints/listj1b.irp.f
+++ b/src/ao_many_one_e_ints/listj1b.irp.f
@@ -1,17 +1,34 @@
 
 ! ---
 
-BEGIN_PROVIDER [ integer, List_all_comb_b2_size]
+BEGIN_PROVIDER [integer, List_all_comb_b2_size]
 
   implicit none
 
-  List_all_comb_b2_size = 2**nucl_num
+  PROVIDE j1b_type
+
+  if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then
+
+    List_all_comb_b2_size = 2**nucl_num
+
+  elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then
+
+    List_all_comb_b2_size = nucl_num + 1
+
+  else 
+
+    print *, 'j1b_type = ', j1b_type, 'is not implemented'
+    stop
+
+  endif
+
+  print *, ' nb of linear terms in the envelope is ', List_all_comb_b2_size
 
 END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [ integer, List_all_comb_b2, (nucl_num, List_all_comb_b2_size)]
+BEGIN_PROVIDER [integer, List_all_comb_b2, (nucl_num, List_all_comb_b2_size)]
 
   implicit none
   integer :: i, j
@@ -50,57 +67,79 @@ END_PROVIDER
   List_all_comb_b2_expo = 0.d0
   List_all_comb_b2_cent = 0.d0
 
-  do i = 1, List_all_comb_b2_size
+  if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then
 
-    tmp_cent_x = 0.d0
-    tmp_cent_y = 0.d0
-    tmp_cent_z = 0.d0
-    do j = 1, nucl_num
-      tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
-      List_all_comb_b2_expo(i) += tmp_alphaj
-      tmp_cent_x               += tmp_alphaj * nucl_coord(j,1)
-      tmp_cent_y               += tmp_alphaj * nucl_coord(j,2)
-      tmp_cent_z               += tmp_alphaj * nucl_coord(j,3)
-    enddo
+    do i = 1, List_all_comb_b2_size
 
-    if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
-
-    List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) 
-    List_all_comb_b2_cent(2,i) = tmp_cent_y / List_all_comb_b2_expo(i)
-    List_all_comb_b2_cent(3,i) = tmp_cent_z / List_all_comb_b2_expo(i)
-  enddo
-
-  ! ---
-
-  do i = 1, List_all_comb_b2_size
-
-    do j = 2, nucl_num, 1
-      tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
-      do k = 1, j-1, 1
-        tmp_alphak = dble(List_all_comb_b2(k,i)) * j1b_pen(k)
-
-        List_all_comb_b2_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) &
-                                                              + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) &
-                                                              + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) )
+      tmp_cent_x = 0.d0
+      tmp_cent_y = 0.d0
+      tmp_cent_z = 0.d0
+      do j = 1, nucl_num
+        tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+        List_all_comb_b2_expo(i) += tmp_alphaj
+        tmp_cent_x               += tmp_alphaj * nucl_coord(j,1)
+        tmp_cent_y               += tmp_alphaj * nucl_coord(j,2)
+        tmp_cent_z               += tmp_alphaj * nucl_coord(j,3)
       enddo
+
+      if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
+
+      List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) 
+      List_all_comb_b2_cent(2,i) = tmp_cent_y / List_all_comb_b2_expo(i)
+      List_all_comb_b2_cent(3,i) = tmp_cent_z / List_all_comb_b2_expo(i)
     enddo
 
-    if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
+    ! ---
 
-    List_all_comb_b2_coef(i) = List_all_comb_b2_coef(i) / List_all_comb_b2_expo(i)
-  enddo
+    do i = 1, List_all_comb_b2_size
 
-  ! ---
+      do j = 2, nucl_num, 1
+        tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+        do k = 1, j-1, 1
+          tmp_alphak = dble(List_all_comb_b2(k,i)) * j1b_pen(k)
 
-  do i = 1, List_all_comb_b2_size
+          List_all_comb_b2_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) &
+                                                                + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) &
+                                                                + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) )
+        enddo
+      enddo
 
-    phase = 0
-    do j = 1, nucl_num
-      phase += List_all_comb_b2(j,i)
+      if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
+
+      List_all_comb_b2_coef(i) = List_all_comb_b2_coef(i) / List_all_comb_b2_expo(i)
     enddo
 
-    List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i))
-  enddo
+    ! ---
+
+    do i = 1, List_all_comb_b2_size
+
+      phase = 0
+      do j = 1, nucl_num
+        phase += List_all_comb_b2(j,i)
+      enddo
+
+      List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i))
+    enddo
+
+  elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then
+
+    List_all_comb_b2_coef(    1) = 1.d0
+    List_all_comb_b2_expo(    1) = 0.d0
+    List_all_comb_b2_cent(1:3,1) = 0.d0
+    do i = 1, nucl_num
+      List_all_comb_b2_coef(  i+1) = -1.d0
+      List_all_comb_b2_expo(  i+1) = j1b_pen(   i)
+      List_all_comb_b2_cent(1,i+1) = nucl_coord(i,1)
+      List_all_comb_b2_cent(2,i+1) = nucl_coord(i,2)
+      List_all_comb_b2_cent(3,i+1) = nucl_coord(i,3)
+    enddo
+
+  else
+
+    print *, 'j1b_type = ', j1b_type, 'is not implemented'
+    stop
+
+  endif
 
   !print *, ' coeff, expo & cent of list b2'
   !do i = 1, List_all_comb_b2_size
@@ -115,14 +154,31 @@ END_PROVIDER
 BEGIN_PROVIDER [ integer, List_all_comb_b3_size]
 
   implicit none
+  double precision :: tmp
 
-  List_all_comb_b3_size = 3**nucl_num
+  if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then
+
+    List_all_comb_b3_size = 3**nucl_num
+
+  elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then
+
+    tmp                   = 0.5d0 * dble(nucl_num) * (dble(nucl_num) + 3.d0)
+    List_all_comb_b3_size = int(tmp) + 1
+
+  else
+
+    print *, 'j1b_type = ', j1b_type, 'is not implemented'
+    stop
+
+  endif
+
+  print *, ' nb of linear terms in the square of the envelope is ', List_all_comb_b3_size
 
 END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [ integer, List_all_comb_b3, (nucl_num, List_all_comb_b3_size)]
+BEGIN_PROVIDER [integer, List_all_comb_b3, (nucl_num, List_all_comb_b3_size)]
 
   implicit none
   integer              :: i, j, ii, jj
@@ -162,7 +218,11 @@ END_PROVIDER
 
   implicit none
   integer          :: i, j, k, phase
+  integer          :: ii
   double precision :: tmp_alphaj, tmp_alphak, facto
+  double precision :: tmp1, tmp2, tmp3, tmp4
+  double precision :: xi, yi, zi, xj, yj, zj
+  double precision :: dx, dy, dz, r2
 
   provide j1b_pen
 
@@ -170,60 +230,127 @@ END_PROVIDER
   List_all_comb_b3_expo = 0.d0
   List_all_comb_b3_cent = 0.d0
 
-  do i = 1, List_all_comb_b3_size
+  if((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then
 
-    do j = 1, nucl_num
-      tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
-      List_all_comb_b3_expo(i)   += tmp_alphaj
-      List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1)
-      List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2)
-      List_all_comb_b3_cent(3,i) += tmp_alphaj * nucl_coord(j,3)
+    do i = 1, List_all_comb_b3_size
 
+      do j = 1, nucl_num
+        tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
+        List_all_comb_b3_expo(i)   += tmp_alphaj
+        List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1)
+        List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2)
+        List_all_comb_b3_cent(3,i) += tmp_alphaj * nucl_coord(j,3)
+
+      enddo
+
+      if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
+      ASSERT(List_all_comb_b3_expo(i) .gt. 0d0)
+
+      List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) 
+      List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i)
+      List_all_comb_b3_cent(3,i) = List_all_comb_b3_cent(3,i) / List_all_comb_b3_expo(i)
     enddo
 
-    if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
-    ASSERT(List_all_comb_b3_expo(i) .gt. 0d0)
+    ! ---
 
-    List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) 
-    List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i)
-    List_all_comb_b3_cent(3,i) = List_all_comb_b3_cent(3,i) / List_all_comb_b3_expo(i)
-  enddo
+    do i = 1, List_all_comb_b3_size
 
-  ! ---
+      do j = 2, nucl_num, 1
+        tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
+        do k = 1, j-1, 1
+          tmp_alphak = dble(List_all_comb_b3(k,i)) * j1b_pen(k)
 
-  do i = 1, List_all_comb_b3_size
+          List_all_comb_b3_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) &
+                                                                + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) &
+                                                                + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) )
+        enddo
+      enddo
 
-    do j = 2, nucl_num, 1
-      tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
-      do k = 1, j-1, 1
-        tmp_alphak = dble(List_all_comb_b3(k,i)) * j1b_pen(k)
+      if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
 
-        List_all_comb_b3_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) &
-                                                              + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) &
-                                                              + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) )
+      List_all_comb_b3_coef(i) = List_all_comb_b3_coef(i) / List_all_comb_b3_expo(i)
+    enddo
+
+    ! ---
+
+    do i = 1, List_all_comb_b3_size
+
+      facto = 1.d0
+      phase = 0
+      do j = 1, nucl_num
+        tmp_alphaj = dble(List_all_comb_b3(j,i)) 
+
+        facto *= 2.d0 / (gamma(tmp_alphaj+1.d0) * gamma(3.d0-tmp_alphaj))
+        phase += List_all_comb_b3(j,i)
+      enddo
+
+      List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i))
+    enddo
+
+  elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then
+
+    ii = 1
+    List_all_comb_b3_coef(    ii) = 1.d0
+    List_all_comb_b3_expo(    ii) = 0.d0
+    List_all_comb_b3_cent(1:3,ii) = 0.d0
+
+    do i = 1, nucl_num
+      ii = ii + 1
+      List_all_comb_b3_coef(  ii) = -2.d0
+      List_all_comb_b3_expo(  ii) = j1b_pen(   i)
+      List_all_comb_b3_cent(1,ii) = nucl_coord(i,1)
+      List_all_comb_b3_cent(2,ii) = nucl_coord(i,2)
+      List_all_comb_b3_cent(3,ii) = nucl_coord(i,3)
+    enddo
+
+    do i = 1, nucl_num
+      ii = ii + 1
+      List_all_comb_b3_coef(  ii) = 1.d0
+      List_all_comb_b3_expo(  ii) = 2.d0 * j1b_pen(i)
+      List_all_comb_b3_cent(1,ii) = nucl_coord(i,1)
+      List_all_comb_b3_cent(2,ii) = nucl_coord(i,2)
+      List_all_comb_b3_cent(3,ii) = nucl_coord(i,3)
+    enddo
+
+    do i = 1, nucl_num-1
+
+      tmp1 = j1b_pen(i)
+
+      xi = nucl_coord(i,1)
+      yi = nucl_coord(i,2)
+      zi = nucl_coord(i,3)
+
+      do j = i+1, nucl_num
+
+        tmp2 = j1b_pen(j)
+        tmp3 = tmp1 + tmp2
+        tmp4 = 1.d0 / tmp3
+
+        xj = nucl_coord(j,1)
+        yj = nucl_coord(j,2)
+        zj = nucl_coord(j,3)
+
+        dx = xi - xj
+        dy = yi - yj
+        dz = zi - zj
+        r2 = dx*dx + dy*dy + dz*dz
+        
+        ii = ii + 1
+        ! x 2 to avoid doing integrals twice
+        List_all_comb_b3_coef(  ii) = 2.d0 * dexp(-tmp1*tmp2*tmp4*r2)
+        List_all_comb_b3_expo(  ii) = tmp3
+        List_all_comb_b3_cent(1,ii) = tmp4 * (tmp1 * xi + tmp2 * xj)
+        List_all_comb_b3_cent(2,ii) = tmp4 * (tmp1 * yi + tmp2 * yj)
+        List_all_comb_b3_cent(3,ii) = tmp4 * (tmp1 * zi + tmp2 * zj)
       enddo
     enddo
 
-    if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
+  else
 
-    List_all_comb_b3_coef(i) = List_all_comb_b3_coef(i) / List_all_comb_b3_expo(i)
-  enddo
+    print *, 'j1b_type = ', j1b_type, 'is not implemented'
+    stop
 
-  ! ---
-
-  do i = 1, List_all_comb_b3_size
-
-    facto = 1.d0
-    phase = 0
-    do j = 1, nucl_num
-      tmp_alphaj = dble(List_all_comb_b3(j,i)) 
-
-      facto *= 2.d0 / (gamma(tmp_alphaj+1.d0) * gamma(3.d0-tmp_alphaj))
-      phase += List_all_comb_b3(j,i)
-    enddo
-
-    List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i))
-  enddo
+  endif
 
   !print *, ' coeff, expo & cent of list b3'
   !do i = 1, List_all_comb_b3_size
diff --git a/src/ao_one_e_ints/NEED b/src/ao_one_e_ints/NEED
index 61d23b1e..b9caaf5d 100644
--- a/src/ao_one_e_ints/NEED
+++ b/src/ao_one_e_ints/NEED
@@ -1,2 +1,3 @@
 ao_basis
 pseudo
+cosgtos_ao_int
diff --git a/src/ao_one_e_ints/ao_overlap.irp.f b/src/ao_one_e_ints/ao_overlap.irp.f
index d9061d67..597eb32a 100644
--- a/src/ao_one_e_ints/ao_overlap.irp.f
+++ b/src/ao_one_e_ints/ao_overlap.irp.f
@@ -1,75 +1,99 @@
- BEGIN_PROVIDER [ double precision, ao_overlap,(ao_num,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_overlap_x,(ao_num,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_overlap_y,(ao_num,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_overlap_z,(ao_num,ao_num) ]
-  implicit none
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, ao_overlap  , (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_x, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_y, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_z, (ao_num, ao_num) ]
+
   BEGIN_DOC
-! Overlap between atomic basis functions:
-!
-! :math:`\int \chi_i(r) \chi_j(r) dr`
+  ! Overlap between atomic basis functions:
+  !
+  ! :math:`\int \chi_i(r) \chi_j(r) dr`
   END_DOC
-  integer :: i,j,n,l
-  double precision :: f
-  integer :: dim1
+
+  implicit none
+  integer          :: i, j, n, l, dim1, power_A(3), power_B(3)
   double precision :: overlap, overlap_x, overlap_y, overlap_z
   double precision :: alpha, beta, c
   double precision :: A_center(3), B_center(3)
-  integer :: power_A(3), power_B(3)
-  ao_overlap = 0.d0
+
+  ao_overlap   = 0.d0
   ao_overlap_x = 0.d0
   ao_overlap_y = 0.d0
   ao_overlap_z = 0.d0
-  if (read_ao_integrals_overlap) then
-     call ezfio_get_ao_one_e_ints_ao_integrals_overlap(ao_overlap(1:ao_num, 1:ao_num))
-     print *,  'AO overlap integrals read from disk'
+
+  if(read_ao_integrals_overlap) then
+
+    call ezfio_get_ao_one_e_ints_ao_integrals_overlap(ao_overlap(1:ao_num, 1:ao_num))
+    print *,  'AO overlap integrals read from disk'
+
   else
 
-    dim1=100
-    !$OMP PARALLEL DO SCHEDULE(GUIDED) &
-    !$OMP DEFAULT(NONE) &
-    !$OMP PRIVATE(A_center,B_center,power_A,power_B,&
-    !$OMP  overlap_x,overlap_y, overlap_z, overlap, &
-    !$OMP  alpha, beta,i,j,c) &
-    !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, &
-    !$OMP  ao_overlap_x,ao_overlap_y,ao_overlap_z,ao_overlap,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, &
-    !$OMP  ao_expo_ordered_transp,dim1)
-    do j=1,ao_num
-    A_center(1) = nucl_coord( ao_nucl(j), 1 )
-    A_center(2) = nucl_coord( ao_nucl(j), 2 )
-    A_center(3) = nucl_coord( ao_nucl(j), 3 )
-    power_A(1)  = ao_power( j, 1 )
-    power_A(2)  = ao_power( j, 2 )
-    power_A(3)  = ao_power( j, 3 )
-    do i= 1,ao_num
-      B_center(1) = nucl_coord( ao_nucl(i), 1 )
-      B_center(2) = nucl_coord( ao_nucl(i), 2 )
-      B_center(3) = nucl_coord( ao_nucl(i), 3 )
-      power_B(1)  = ao_power( i, 1 )
-      power_B(2)  = ao_power( i, 2 )
-      power_B(3)  = ao_power( i, 3 )
-      do n = 1,ao_prim_num(j)
-      alpha = ao_expo_ordered_transp(n,j)
-      do l = 1, ao_prim_num(i)
-        beta = ao_expo_ordered_transp(l,i)
-        call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
-        c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i)
-        ao_overlap(i,j) += c * overlap
-        if(isnan(ao_overlap(i,j)))then
-         print*,'i,j',i,j
-         print*,'l,n',l,n
-         print*,'c,overlap',c,overlap
-         print*,overlap_x,overlap_y,overlap_z
-         stop
-        endif
-        ao_overlap_x(i,j) += c * overlap_x
-        ao_overlap_y(i,j) += c * overlap_y
-        ao_overlap_z(i,j) += c * overlap_z
+    if(use_cosgtos) then
+      !print*, ' use_cosgtos for ao_overlap ?', use_cosgtos
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          ao_overlap  (i,j) = ao_overlap_cosgtos  (i,j) 
+          ao_overlap_x(i,j) = ao_overlap_cosgtos_x(i,j)
+          ao_overlap_y(i,j) = ao_overlap_cosgtos_y(i,j)
+          ao_overlap_z(i,j) = ao_overlap_cosgtos_z(i,j)
+        enddo
+      enddo
+
+    else
+
+      dim1=100
+      !$OMP PARALLEL DO SCHEDULE(GUIDED) &
+      !$OMP DEFAULT(NONE) &
+      !$OMP PRIVATE(A_center,B_center,power_A,power_B,&
+      !$OMP  overlap_x,overlap_y, overlap_z, overlap, &
+      !$OMP  alpha, beta,i,j,c) &
+      !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, &
+      !$OMP  ao_overlap_x,ao_overlap_y,ao_overlap_z,ao_overlap,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, &
+      !$OMP  ao_expo_ordered_transp,dim1)
+      do j=1,ao_num
+      A_center(1) = nucl_coord( ao_nucl(j), 1 )
+      A_center(2) = nucl_coord( ao_nucl(j), 2 )
+      A_center(3) = nucl_coord( ao_nucl(j), 3 )
+      power_A(1)  = ao_power( j, 1 )
+      power_A(2)  = ao_power( j, 2 )
+      power_A(3)  = ao_power( j, 3 )
+      do i= 1,ao_num
+        B_center(1) = nucl_coord( ao_nucl(i), 1 )
+        B_center(2) = nucl_coord( ao_nucl(i), 2 )
+        B_center(3) = nucl_coord( ao_nucl(i), 3 )
+        power_B(1)  = ao_power( i, 1 )
+        power_B(2)  = ao_power( i, 2 )
+        power_B(3)  = ao_power( i, 3 )
+        do n = 1,ao_prim_num(j)
+        alpha = ao_expo_ordered_transp(n,j)
+        do l = 1, ao_prim_num(i)
+          beta = ao_expo_ordered_transp(l,i)
+          call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
+          c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i)
+          ao_overlap(i,j) += c * overlap
+          if(isnan(ao_overlap(i,j)))then
+           print*,'i,j',i,j
+           print*,'l,n',l,n
+           print*,'c,overlap',c,overlap
+           print*,overlap_x,overlap_y,overlap_z
+           stop
+          endif
+          ao_overlap_x(i,j) += c * overlap_x
+          ao_overlap_y(i,j) += c * overlap_y
+          ao_overlap_z(i,j) += c * overlap_z
+        enddo
+        enddo
       enddo
       enddo
-    enddo
-    enddo
-    !$OMP END PARALLEL DO
+      !$OMP END PARALLEL DO
+
+    endif
+
   endif
+
   if (write_ao_integrals_overlap) then
      call ezfio_set_ao_one_e_ints_ao_integrals_overlap(ao_overlap(1:ao_num, 1:ao_num))
      print *,  'AO overlap integrals written to disk'
@@ -77,6 +101,8 @@
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [ double precision, ao_overlap_imag, (ao_num, ao_num) ]
  implicit none
  BEGIN_DOC
@@ -85,6 +111,8 @@ BEGIN_PROVIDER [ double precision, ao_overlap_imag, (ao_num, ao_num) ]
  ao_overlap_imag = 0.d0
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [ complex*16, ao_overlap_complex, (ao_num, ao_num) ]
   implicit none
   BEGIN_DOC
@@ -98,41 +126,43 @@ BEGIN_PROVIDER [ complex*16, ao_overlap_complex, (ao_num, ao_num) ]
   enddo
 END_PROVIDER
 
+! ---
 
+BEGIN_PROVIDER [ double precision, ao_overlap_abs, (ao_num, ao_num) ]
 
-
-BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ]
-  implicit none
   BEGIN_DOC
-! Overlap between absolute values of atomic basis functions:
-!
-! :math:`\int |\chi_i(r)| |\chi_j(r)| dr`
+  ! Overlap between absolute values of atomic basis functions:
+  !
+  ! :math:`\int |\chi_i(r)| |\chi_j(r)| dr`
   END_DOC
-  integer :: i,j,n,l
-  double precision :: f
-  integer :: dim1
-  double precision :: overlap, overlap_x, overlap_y, overlap_z
+
+  implicit none
+  integer          :: i, j, n, l, dim1, power_A(3), power_B(3)
+  double precision :: overlap_x, overlap_y, overlap_z
   double precision :: alpha, beta
   double precision :: A_center(3), B_center(3)
-  integer :: power_A(3), power_B(3)
   double precision :: lower_exp_val, dx
-  if (is_periodic) then
-    do j=1,ao_num
-      do i= 1,ao_num
-        ao_overlap_abs(i,j)= cdabs(ao_overlap_complex(i,j))
+
+  if(is_periodic) then
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+        ao_overlap_abs(i,j) = cdabs(ao_overlap_complex(i,j))
       enddo
     enddo
+
   else
+
     dim1=100
     lower_exp_val = 40.d0
-    !$OMP PARALLEL DO SCHEDULE(GUIDED)                               &
-        !$OMP DEFAULT(NONE)                                          &
-        !$OMP PRIVATE(A_center,B_center,power_A,power_B,             &
-        !$OMP  overlap_x,overlap_y, overlap_z, overlap,              &
-        !$OMP  alpha, beta,i,j,dx)                                   &
-        !$OMP SHARED(nucl_coord,ao_power,ao_prim_num,                &
-        !$OMP  ao_overlap_abs,ao_num,ao_coef_normalized_ordered_transp,ao_nucl,&
-        !$OMP  ao_expo_ordered_transp,dim1,lower_exp_val)
+ !$OMP PARALLEL DO SCHEDULE(GUIDED)               &
+ !$OMP DEFAULT(NONE)                              &
+ !$OMP PRIVATE(A_center,B_center,power_A,power_B, &
+ !$OMP  overlap_x,overlap_y, overlap_z,           &
+ !$OMP  alpha, beta,i,j,dx)                       &
+ !$OMP SHARED(nucl_coord,ao_power,ao_prim_num,    &
+ !$OMP  ao_overlap_abs,ao_num,ao_coef_normalized_ordered_transp,ao_nucl,&
+ !$OMP  ao_expo_ordered_transp,dim1,lower_exp_val)
     do j=1,ao_num
       A_center(1) = nucl_coord( ao_nucl(j), 1 )
       A_center(2) = nucl_coord( ao_nucl(j), 2 )
@@ -160,10 +190,14 @@ BEGIN_PROVIDER [ double precision, ao_overlap_abs,(ao_num,ao_num) ]
         enddo
       enddo
     enddo
-    !$OMP END PARALLEL DO
+ !$OMP END PARALLEL DO
+
   endif
+
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [ double precision, S_inv,(ao_num,ao_num) ]
  implicit none
  BEGIN_DOC
diff --git a/src/ao_one_e_ints/kin_ao_ints.irp.f b/src/ao_one_e_ints/kin_ao_ints.irp.f
index 4f117deb..a5ee0670 100644
--- a/src/ao_one_e_ints/kin_ao_ints.irp.f
+++ b/src/ao_one_e_ints/kin_ao_ints.irp.f
@@ -1,7 +1,10 @@
- BEGIN_PROVIDER [ double precision, ao_deriv2_x,(ao_num,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_deriv2_y,(ao_num,ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_deriv2_z,(ao_num,ao_num) ]
-  implicit none
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, ao_deriv2_x, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_deriv2_y, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_deriv2_z, (ao_num, ao_num) ]
+
   BEGIN_DOC
   ! Second derivative matrix elements in the |AO| basis.
   !
@@ -11,114 +14,131 @@
   !   \langle \chi_i(x,y,z) | \frac{\partial^2}{\partial x^2} |\chi_j (x,y,z) \rangle
   !
   END_DOC
-  integer :: i,j,n,l
-  double precision :: f
-  integer :: dim1
+
+  implicit none
+  integer          :: i, j, n, l, dim1, power_A(3), power_B(3)
   double precision :: overlap, overlap_y, overlap_z
   double precision :: overlap_x0, overlap_y0, overlap_z0
   double precision :: alpha, beta, c
   double precision :: A_center(3), B_center(3)
-  integer :: power_A(3), power_B(3)
   double precision :: d_a_2,d_2
-  dim1=100
 
-  ! -- Dummy call to provide everything
-   A_center(:) = 0.d0
-   B_center(:) = 1.d0
-   alpha = 1.d0
-   beta  = .1d0
-   power_A = 1
-   power_B = 0
-  call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1)
-  ! --
+  if(use_cosgtos) then
+    !print*, 'use_cosgtos for ao_kinetic_integrals ?', use_cosgtos
 
-  !$OMP PARALLEL DO SCHEDULE(GUIDED) &
-  !$OMP DEFAULT(NONE) &
-  !$OMP PRIVATE(A_center,B_center,power_A,power_B,&
-  !$OMP  overlap_y, overlap_z, overlap, &
-  !$OMP  alpha, beta,i,j,c,d_a_2,d_2,deriv_tmp, &
-  !$OMP  overlap_x0,overlap_y0,overlap_z0) &
-  !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, &
-  !$OMP  ao_deriv2_x,ao_deriv2_y,ao_deriv2_z,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, &
-  !$OMP  ao_expo_ordered_transp,dim1)
-  do j=1,ao_num
-   A_center(1) = nucl_coord( ao_nucl(j), 1 )
-   A_center(2) = nucl_coord( ao_nucl(j), 2 )
-   A_center(3) = nucl_coord( ao_nucl(j), 3 )
-   power_A(1)  = ao_power( j, 1 )
-   power_A(2)  = ao_power( j, 2 )
-   power_A(3)  = ao_power( j, 3 )
-   do i= 1,ao_num
-    ao_deriv2_x(i,j)= 0.d0
-    ao_deriv2_y(i,j)= 0.d0
-    ao_deriv2_z(i,j)= 0.d0
-    B_center(1) = nucl_coord( ao_nucl(i), 1 )
-    B_center(2) = nucl_coord( ao_nucl(i), 2 )
-    B_center(3) = nucl_coord( ao_nucl(i), 3 )
-    power_B(1)  = ao_power( i, 1 )
-    power_B(2)  = ao_power( i, 2 )
-    power_B(3)  = ao_power( i, 3 )
-    do n = 1,ao_prim_num(j)
-     alpha = ao_expo_ordered_transp(n,j)
-     do l = 1, ao_prim_num(i)
-      beta = ao_expo_ordered_transp(l,i)
-      call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x0,overlap_y0,overlap_z0,overlap,dim1)
-      c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        ao_deriv2_x(i,j) = ao_deriv2_cosgtos_x(i,j)
+        ao_deriv2_y(i,j) = ao_deriv2_cosgtos_y(i,j)
+        ao_deriv2_z(i,j) = ao_deriv2_cosgtos_z(i,j)
+      enddo
+    enddo
 
-      power_A(1) = power_A(1)-2
-      if (power_A(1)>-1) then
-        call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_a_2,overlap_y,overlap_z,overlap,dim1)
-      else
-        d_a_2 = 0.d0
-      endif
-      power_A(1) = power_A(1)+4
-      call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_2,overlap_y,overlap_z,overlap,dim1)
-      power_A(1) = power_A(1)-2
+  else
 
-      double precision :: deriv_tmp
-      deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(1) +1.d0) * overlap_x0 &
-      +power_A(1) * (power_A(1)-1.d0) * d_a_2 &
-      +4.d0 * alpha * alpha * d_2   )*overlap_y0*overlap_z0
+    dim1=100
 
-      ao_deriv2_x(i,j) += c*deriv_tmp
-      power_A(2) = power_A(2)-2
-      if (power_A(2)>-1) then
-        call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1)
-      else
-        d_a_2 = 0.d0
-      endif
-      power_A(2) = power_A(2)+4
-      call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_2,overlap_z,overlap,dim1)
-      power_A(2) = power_A(2)-2
+    ! -- Dummy call to provide everything
+    A_center(:) = 0.d0
+    B_center(:) = 1.d0
+    alpha = 1.d0
+    beta  = .1d0
+    power_A = 1
+    power_B = 0
+    call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1)
+    ! --
 
-      deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(2) +1.d0 ) * overlap_y0 &
-      +power_A(2) * (power_A(2)-1.d0) * d_a_2 &
-      +4.d0 * alpha * alpha * d_2   )*overlap_x0*overlap_z0
-      ao_deriv2_y(i,j) += c*deriv_tmp
+    !$OMP PARALLEL DO SCHEDULE(GUIDED) &
+    !$OMP DEFAULT(NONE) &
+    !$OMP PRIVATE(A_center,B_center,power_A,power_B,&
+    !$OMP  overlap_y, overlap_z, overlap, &
+    !$OMP  alpha, beta,i,j,c,d_a_2,d_2,deriv_tmp, &
+    !$OMP  overlap_x0,overlap_y0,overlap_z0) &
+    !$OMP SHARED(nucl_coord,ao_power,ao_prim_num, &
+    !$OMP  ao_deriv2_x,ao_deriv2_y,ao_deriv2_z,ao_num,ao_coef_normalized_ordered_transp,ao_nucl, &
+    !$OMP  ao_expo_ordered_transp,dim1)
+    do j=1,ao_num
+     A_center(1) = nucl_coord( ao_nucl(j), 1 )
+     A_center(2) = nucl_coord( ao_nucl(j), 2 )
+     A_center(3) = nucl_coord( ao_nucl(j), 3 )
+     power_A(1)  = ao_power( j, 1 )
+     power_A(2)  = ao_power( j, 2 )
+     power_A(3)  = ao_power( j, 3 )
+     do i= 1,ao_num
+      ao_deriv2_x(i,j)= 0.d0
+      ao_deriv2_y(i,j)= 0.d0
+      ao_deriv2_z(i,j)= 0.d0
+      B_center(1) = nucl_coord( ao_nucl(i), 1 )
+      B_center(2) = nucl_coord( ao_nucl(i), 2 )
+      B_center(3) = nucl_coord( ao_nucl(i), 3 )
+      power_B(1)  = ao_power( i, 1 )
+      power_B(2)  = ao_power( i, 2 )
+      power_B(3)  = ao_power( i, 3 )
+      do n = 1,ao_prim_num(j)
+       alpha = ao_expo_ordered_transp(n,j)
+       do l = 1, ao_prim_num(i)
+        beta = ao_expo_ordered_transp(l,i)
+        call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x0,overlap_y0,overlap_z0,overlap,dim1)
+        c = ao_coef_normalized_ordered_transp(n,j) * ao_coef_normalized_ordered_transp(l,i)
 
-      power_A(3) = power_A(3)-2
-      if (power_A(3)>-1) then
-        call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_a_2,overlap,dim1)
-      else
-        d_a_2 = 0.d0
-      endif
-      power_A(3) = power_A(3)+4
-      call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_2,overlap,dim1)
-      power_A(3) = power_A(3)-2
+        power_A(1) = power_A(1)-2
+        if (power_A(1)>-1) then
+          call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_a_2,overlap_y,overlap_z,overlap,dim1)
+        else
+          d_a_2 = 0.d0
+        endif
+        power_A(1) = power_A(1)+4
+        call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,d_2,overlap_y,overlap_z,overlap,dim1)
+        power_A(1) = power_A(1)-2
 
-      deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(3) +1.d0 ) * overlap_z0 &
-      +power_A(3) * (power_A(3)-1.d0) * d_a_2 &
-      +4.d0 * alpha * alpha * d_2   )*overlap_x0*overlap_y0
-      ao_deriv2_z(i,j) += c*deriv_tmp
+        double precision :: deriv_tmp
+        deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(1) +1.d0) * overlap_x0 &
+        +power_A(1) * (power_A(1)-1.d0) * d_a_2 &
+        +4.d0 * alpha * alpha * d_2   )*overlap_y0*overlap_z0
 
+        ao_deriv2_x(i,j) += c*deriv_tmp
+        power_A(2) = power_A(2)-2
+        if (power_A(2)>-1) then
+          call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_a_2,overlap_z,overlap,dim1)
+        else
+          d_a_2 = 0.d0
+        endif
+        power_A(2) = power_A(2)+4
+        call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,d_2,overlap_z,overlap,dim1)
+        power_A(2) = power_A(2)-2
+
+        deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(2) +1.d0 ) * overlap_y0 &
+        +power_A(2) * (power_A(2)-1.d0) * d_a_2 &
+        +4.d0 * alpha * alpha * d_2   )*overlap_x0*overlap_z0
+        ao_deriv2_y(i,j) += c*deriv_tmp
+
+        power_A(3) = power_A(3)-2
+        if (power_A(3)>-1) then
+          call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_a_2,overlap,dim1)
+        else
+          d_a_2 = 0.d0
+        endif
+        power_A(3) = power_A(3)+4
+        call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_y,overlap_z,d_2,overlap,dim1)
+        power_A(3) = power_A(3)-2
+
+        deriv_tmp = (-2.d0 * alpha * (2.d0 * power_A(3) +1.d0 ) * overlap_z0 &
+        +power_A(3) * (power_A(3)-1.d0) * d_a_2 &
+        +4.d0 * alpha * alpha * d_2   )*overlap_x0*overlap_y0
+        ao_deriv2_z(i,j) += c*deriv_tmp
+
+       enddo
+      enddo
      enddo
     enddo
-   enddo
-  enddo
-  !$OMP END PARALLEL DO
+    !$OMP END PARALLEL DO
+
+  endif
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, ao_kinetic_integrals, (ao_num,ao_num)]
   implicit none
   BEGIN_DOC
diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
index c4a573be..dddf98d4 100644
--- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
@@ -1,3 +1,6 @@
+
+! ---
+
 subroutine give_all_erf_kl_ao(integrals_ao,mu_in,C_center)
   implicit none
   BEGIN_DOC
@@ -15,36 +18,104 @@ subroutine give_all_erf_kl_ao(integrals_ao,mu_in,C_center)
   enddo
 end
 
+! ---
+
+double precision function NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center)
 
-double precision function NAI_pol_mult_erf_ao(i_ao,j_ao,mu_in,C_center)
-  implicit none
   BEGIN_DOC
+  !
   ! Computes the following integral :
-  ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
   END_DOC
-  integer, intent(in)            :: i_ao,j_ao
+
+  implicit none
+  integer,          intent(in)   :: i_ao, j_ao
   double precision, intent(in)   :: mu_in, C_center(3)
-  integer                        :: i,j,num_A,num_B, power_A(3), power_B(3), n_pt_in
-  double precision               :: A_center(3), B_center(3),integral, alpha,beta
+
+  integer                        :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in
+  double precision               :: A_center(3), B_center(3), integral, alpha, beta
+
   double precision               :: NAI_pol_mult_erf
-  num_A = ao_nucl(i_ao)
-  power_A(1:3)= ao_power(i_ao,1:3)
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
   A_center(1:3) = nucl_coord(num_A,1:3)
-  num_B = ao_nucl(j_ao)
-  power_B(1:3)= ao_power(j_ao,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
   B_center(1:3) = nucl_coord(num_B,1:3)
+
   n_pt_in = n_pt_max_integrals
+
   NAI_pol_mult_erf_ao = 0.d0
   do i = 1, ao_prim_num(i_ao)
     alpha = ao_expo_ordered_transp(i,i_ao)
     do j = 1, ao_prim_num(j_ao)
       beta = ao_expo_ordered_transp(j,j_ao)
-      integral =  NAI_pol_mult_erf(A_center,B_center,power_A,power_B,alpha,beta,C_center,n_pt_in,mu_in)
-      NAI_pol_mult_erf_ao += integral * ao_coef_normalized_ordered_transp(j,j_ao)*ao_coef_normalized_ordered_transp(i,i_ao)
+
+      integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in,mu_in)
+
+      NAI_pol_mult_erf_ao += integral * ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
     enddo
   enddo
 
-end
+end function NAI_pol_mult_erf_ao
+
+! ---
+
+double precision function NAI_pol_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)   :: i_ao, j_ao
+  double precision, intent(in)   :: beta, B_center(3)
+  double precision, intent(in)   :: mu_in, C_center(3)
+
+  integer                        :: i, j, power_A1(3), power_A2(3), n_pt_in
+  double precision               :: A1_center(3), A2_center(3), alpha1, alpha2, coef12, coef1, integral
+
+  double precision, external     :: NAI_pol_mult_erf_with1s, NAI_pol_mult_erf_ao
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_mult_erf_ao_with1s = NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  power_A1(1:3) = ao_power(i_ao,1:3)
+  power_A2(1:3) = ao_power(j_ao,1:3)
+
+  A1_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  A2_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  NAI_pol_mult_erf_ao_with1s = 0.d0
+  do i = 1, ao_prim_num(i_ao)
+    alpha1 = ao_expo_ordered_transp           (i,i_ao)
+    coef1  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alpha2 = ao_expo_ordered_transp(j,j_ao)
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(j,j_ao)
+      if(dabs(coef12) .lt. 1d-14) cycle
+
+      integral = NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+
+      NAI_pol_mult_erf_ao_with1s += integral * coef12
+    enddo
+  enddo
+
+end function NAI_pol_mult_erf_ao_with1s
+
+! ---
 
 double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
 
@@ -127,58 +198,221 @@ end function NAI_pol_mult_erf
 
 ! ---
 
-
-double precision function NAI_pol_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center)
+subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points)
 
   BEGIN_DOC
   !
   ! Computes the following integral :
-  ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
+  ! .. math::
+  !
+  !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
   !
   END_DOC
 
+  include 'utils/constants.include.F'
+
   implicit none
-  integer,          intent(in)   :: i_ao, j_ao
-  double precision, intent(in)   :: beta, B_center(3)
-  double precision, intent(in)   :: mu_in, C_center(3)
 
-  integer                        :: i, j, power_A1(3), power_A2(3), n_pt_in
-  double precision               :: A1_center(3), A2_center(3), alpha1, alpha2, coef12, coef1, integral
+  integer,          intent(in)  :: n_pt_in, n_points, LD_C, LD_resv
+  integer,          intent(in)  :: power_A(3), power_B(3)
+  double precision, intent(in)  :: A_center(3), B_center(3), alpha, beta, mu_in
+  double precision, intent(in)  :: C_center(LD_C,3)
+  double precision, intent(out) :: res_v(LD_resv)
 
-  double precision, external     :: NAI_pol_mult_erf_with1s, NAI_pol_mult_erf_ao
+  integer                       :: i, n_pt, n_pt_out, ipoint
+  double precision              :: P_center(3)
+  double precision              :: d(0:n_pt_in), coeff, dist, const, factor
+  double precision              :: const_factor, dist_integral
+  double precision              :: accu, p_inv, p, rho, p_inv_2
+  double precision              :: p_new, p_new2, coef_tmp
 
-  ASSERT(beta .ge. 0.d0)
-  if(beta .lt. 1d-10) then
-    NAI_pol_mult_erf_ao_with1s = NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center)
+  double precision              :: rint
+
+  res_V(1:LD_resv) = 0.d0
+
+  p        = alpha + beta
+  p_inv    = 1.d0 / p
+  p_inv_2  = 0.5d0 * p_inv
+  rho      = alpha * beta * p_inv
+  p_new    = mu_in / dsqrt(p + mu_in * mu_in)
+  p_new2   = p_new * p_new
+  coef_tmp = p * p_new2
+
+  dist = 0.d0
+  do i = 1, 3
+    P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist       += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+  enddo
+
+  const_factor = dist * rho
+  if(const_factor > 80.d0) then
+    return
+  endif
+  factor = dexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv * p_new
+
+  n_pt =  2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) )
+
+  if(n_pt == 0) then
+
+    do ipoint = 1, n_points
+      dist_integral = 0.d0
+      do i = 1, 3
+        dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+      enddo
+      const = coef_tmp * dist_integral 
+
+      res_v(ipoint) = coeff * rint(0, const)
+    enddo
+
+  else
+
+    do ipoint = 1, n_points
+      dist_integral = 0.d0
+      do i = 1, 3
+        dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+      enddo
+      const = coef_tmp * dist_integral 
+
+      do i = 0, n_pt_in
+        d(i) = 0.d0
+      enddo
+      call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center)
+     
+      if(n_pt_out < 0) then
+        res_v(ipoint) = 0.d0
+        cycle
+      endif
+
+      ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+      accu = 0.d0
+      do i = 0, n_pt_out, 2
+        accu += d(i) * rint(i/2, const)
+      enddo
+     
+      res_v(ipoint) = accu * coeff
+    enddo
+
+  endif
+
+end subroutine NAI_pol_mult_erf_v
+
+! ---
+
+double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
+                                                 , beta, B_center, C_center, n_pt_in, mu_in )
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
+  !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
+  !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
+  !   \exp(-\beta (r - B)^2)
+  !   \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in) :: n_pt_in
+  integer,          intent(in) :: power_A1(3), power_A2(3)
+  double precision, intent(in) :: C_center(3), A1_center(3), A2_center(3), B_center(3)
+  double precision, intent(in) :: alpha1, alpha2, beta, mu_in
+
+  integer                      :: i, n_pt, n_pt_out
+  double precision             :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
+  double precision             :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
+  double precision             :: dist_integral
+  double precision             :: d(0:n_pt_in), coeff, const, factor
+  double precision             :: accu
+  double precision             :: p_new
+
+  double precision             :: rint
+
+
+  ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
+  alpha12       = alpha1 + alpha2
+  alpha12_inv   = 1.d0 / alpha12
+  alpha12_inv_2 = 0.5d0 * alpha12_inv
+  rho12         = alpha1 * alpha2 * alpha12_inv
+  A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv
+  A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
+  A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
+  dist12        = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1)) &
+                + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2)) &
+                + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
+
+  const_factor12 = dist12 * rho12
+  if(const_factor12 > 80.d0) then
+    NAI_pol_mult_erf_with1s = 0.d0
     return
   endif
 
-  power_A1(1:3) = ao_power(i_ao,1:3)
-  power_A2(1:3) = ao_power(j_ao,1:3)
+  ! ---
 
-  A1_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
-  A2_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+  ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
+  p           = alpha12 + beta
+  p_inv       = 1.d0 / p
+  p_inv_2     = 0.5d0 * p_inv
+  rho         = alpha12 * beta * p_inv
+  P_center(1) = (alpha12 * A12_center(1) + beta * B_center(1)) * p_inv
+  P_center(2) = (alpha12 * A12_center(2) + beta * B_center(2)) * p_inv
+  P_center(3) = (alpha12 * A12_center(3) + beta * B_center(3)) * p_inv
+  dist        = (A12_center(1) - B_center(1)) * (A12_center(1) - B_center(1)) &
+              + (A12_center(2) - B_center(2)) * (A12_center(2) - B_center(2)) &
+              + (A12_center(3) - B_center(3)) * (A12_center(3) - B_center(3))
 
-  n_pt_in = n_pt_max_integrals
+  const_factor = const_factor12 + dist * rho
+  if(const_factor > 80.d0) then
+    NAI_pol_mult_erf_with1s = 0.d0
+    return
+  endif
 
-  NAI_pol_mult_erf_ao_with1s = 0.d0
-  do i = 1, ao_prim_num(i_ao)
-    alpha1 = ao_expo_ordered_transp           (i,i_ao)
-    coef1  = ao_coef_normalized_ordered_transp(i,i_ao)
+  dist_integral = (P_center(1) - C_center(1)) * (P_center(1) - C_center(1)) &
+                + (P_center(2) - C_center(2)) * (P_center(2) - C_center(2)) &
+                + (P_center(3) - C_center(3)) * (P_center(3) - C_center(3))
 
-    do j = 1, ao_prim_num(j_ao)
-      alpha2 = ao_expo_ordered_transp(j,j_ao)
-      coef12 = coef1 * ao_coef_normalized_ordered_transp(j,j_ao)
-      if(dabs(coef12) .lt. 1d-14) cycle
+  ! ---
 
-      integral = NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
-                                        , beta, B_center, C_center, n_pt_in, mu_in )
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+  factor = dexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv * p_new
 
-      NAI_pol_mult_erf_ao_with1s += integral * coef12
-    enddo
+  n_pt  =  2 * ( (power_A1(1) + power_A2(1)) + (power_A1(2) + power_A2(2)) + (power_A1(3) + power_A2(3)) )
+  const = p * dist_integral * p_new * p_new
+  if(n_pt == 0) then
+    NAI_pol_mult_erf_with1s = coeff * rint(0, const)
+    return
+  endif
+
+  do i = 0, n_pt_in
+    d(i) = 0.d0
   enddo
+  p_new = p_new * p_new
+  call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
 
-end function NAI_pol_mult_erf_ao_with1s
+  if(n_pt_out < 0) then
+    NAI_pol_mult_erf_with1s = 0.d0
+    return
+  endif
+
+  ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+  accu = 0.d0
+  do i = 0, n_pt_out, 2
+    accu += d(i) * rint(i/2, const)
+  enddo
+  NAI_pol_mult_erf_with1s = accu * coeff
+
+end function NAI_pol_mult_erf_with1s
+
+! ---
 
 subroutine NAI_pol_mult_erf_with1s_v(A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, beta, B_center, LD_B, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points)
 
@@ -428,107 +662,6 @@ subroutine give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A
 end subroutine give_polynomial_mult_center_one_e_erf_opt
 
 ! ---
-subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points)
-
-  BEGIN_DOC
-  !
-  ! Computes the following integral :
-  !
-  ! .. math::
-  !
-  !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
-  !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
-  !
-  END_DOC
-
-  include 'utils/constants.include.F'
-
-  implicit none
-
-  integer,          intent(in)  :: n_pt_in, n_points, LD_C, LD_resv
-  integer,          intent(in)  :: power_A(3), power_B(3)
-  double precision, intent(in)  :: A_center(3), B_center(3), alpha, beta, mu_in
-  double precision, intent(in)  :: C_center(LD_C,3)
-  double precision, intent(out) :: res_v(LD_resv)
-
-  integer                       :: i, n_pt, n_pt_out, ipoint
-  double precision              :: P_center(3)
-  double precision              :: d(0:n_pt_in), coeff, dist, const, factor
-  double precision              :: const_factor, dist_integral
-  double precision              :: accu, p_inv, p, rho, p_inv_2
-  double precision              :: p_new, p_new2, coef_tmp
-
-  double precision              :: rint
-
-  res_V(1:LD_resv) = 0.d0
-
-  p        = alpha + beta
-  p_inv    = 1.d0 / p
-  p_inv_2  = 0.5d0 * p_inv
-  rho      = alpha * beta * p_inv
-  p_new    = mu_in / dsqrt(p + mu_in * mu_in)
-  p_new2   = p_new * p_new
-  coef_tmp = p * p_new2
-
-  dist = 0.d0
-  do i = 1, 3
-    P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv
-    dist       += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
-  enddo
-
-  const_factor = dist * rho
-  if(const_factor > 80.d0) then
-    return
-  endif
-  factor = dexp(-const_factor)
-  coeff  = dtwo_pi * factor * p_inv * p_new
-
-  n_pt =  2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) )
-
-  if(n_pt == 0) then
-
-    do ipoint = 1, n_points
-      dist_integral = 0.d0
-      do i = 1, 3
-        dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
-      enddo
-      const = coef_tmp * dist_integral
-
-      res_v(ipoint) = coeff * rint(0, const)
-    enddo
-
-  else
-
-    do ipoint = 1, n_points
-      dist_integral = 0.d0
-      do i = 1, 3
-        dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
-      enddo
-      const = coef_tmp * dist_integral
-
-      do i = 0, n_pt_in
-        d(i) = 0.d0
-      enddo
-      call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center)                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
-
-      if(n_pt_out < 0) then
-        res_v(ipoint) = 0.d0
-        cycle
-      endif
-
-      ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
-      accu = 0.d0
-      do i = 0, n_pt_out, 2
-        accu += d(i) * rint(i/2, const)
-      enddo
-
-      res_v(ipoint) = accu * coeff
-    enddo
-
-  endif
-
-end subroutine NAI_pol_mult_erf_v
-
 
 subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
 
@@ -659,113 +792,3 @@ subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,po
 
 end
 
-double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
-                                                 , beta, B_center, C_center, n_pt_in, mu_in )
-
-  BEGIN_DOC
-  !
-  ! Computes the following integral :
-  !
-  ! .. math::
-  !
-  !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
-  !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
-  !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
-  !   \exp(-\beta (r - B)^2)
-  !   \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
-  !
-  END_DOC
-
-  include 'utils/constants.include.F'
-
-  implicit none
-  integer,          intent(in) :: n_pt_in
-  integer,          intent(in) :: power_A1(3), power_A2(3)
-  double precision, intent(in) :: C_center(3), A1_center(3), A2_center(3), B_center(3)
-  double precision, intent(in) :: alpha1, alpha2, beta, mu_in
-
-  integer                      :: i, n_pt, n_pt_out
-  double precision             :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
-  double precision             :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
-  double precision             :: dist_integral
-  double precision             :: d(0:n_pt_in), coeff, const, factor
-  double precision             :: accu
-  double precision             :: p_new
-
-  double precision             :: rint
-
-
-  ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
-  alpha12       = alpha1 + alpha2
-  alpha12_inv   = 1.d0 / alpha12
-  alpha12_inv_2 = 0.5d0 * alpha12_inv
-  rho12         = alpha1 * alpha2 * alpha12_inv
-  A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv
-  A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
-  A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
-  dist12        = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1)) &
-                + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2)) &
-                + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
-
-  const_factor12 = dist12 * rho12
-  if(const_factor12 > 80.d0) then
-    NAI_pol_mult_erf_with1s = 0.d0
-    return
-  endif
-
-  ! ---
-
-  ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
-  p           = alpha12 + beta
-  p_inv       = 1.d0 / p
-  p_inv_2     = 0.5d0 * p_inv
-  rho         = alpha12 * beta * p_inv
-  P_center(1) = (alpha12 * A12_center(1) + beta * B_center(1)) * p_inv
-  P_center(2) = (alpha12 * A12_center(2) + beta * B_center(2)) * p_inv
-  P_center(3) = (alpha12 * A12_center(3) + beta * B_center(3)) * p_inv
-  dist        = (A12_center(1) - B_center(1)) * (A12_center(1) - B_center(1)) &
-              + (A12_center(2) - B_center(2)) * (A12_center(2) - B_center(2)) &
-              + (A12_center(3) - B_center(3)) * (A12_center(3) - B_center(3))
-
-  const_factor = const_factor12 + dist * rho
-  if(const_factor > 80.d0) then
-    NAI_pol_mult_erf_with1s = 0.d0
-    return
-  endif
-
-  dist_integral = (P_center(1) - C_center(1)) * (P_center(1) - C_center(1)) &
-                + (P_center(2) - C_center(2)) * (P_center(2) - C_center(2)) &
-                + (P_center(3) - C_center(3)) * (P_center(3) - C_center(3))
-
-  ! ---
-
-  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
-  factor = dexp(-const_factor)
-  coeff  = dtwo_pi * factor * p_inv * p_new
-
-  n_pt  =  2 * ( (power_A1(1) + power_A2(1)) + (power_A1(2) + power_A2(2)) + (power_A1(3) + power_A2(3)) )
-  const = p * dist_integral * p_new * p_new
-  if(n_pt == 0) then
-    NAI_pol_mult_erf_with1s = coeff * rint(0, const)
-    return
-  endif
-
-  do i = 0, n_pt_in
-    d(i) = 0.d0
-  enddo
-  p_new = p_new * p_new
-  call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
-
-  if(n_pt_out < 0) then
-    NAI_pol_mult_erf_with1s = 0.d0
-    return
-  endif
-
-  ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
-  accu = 0.d0
-  do i = 0, n_pt_out, 2
-    accu += d(i) * rint(i/2, const)
-  enddo
-  NAI_pol_mult_erf_with1s = accu * coeff
-
-end function NAI_pol_mult_erf_with1s
diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f
index 1d92dc7d..928053ad 100644
--- a/src/ao_one_e_ints/pot_ao_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_ints.irp.f
@@ -1,4 +1,8 @@
+
+! ---
+
 BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)]
+
   BEGIN_DOC
   !  Nucleus-electron interaction, in the |AO| basis set.
   !
@@ -6,84 +10,100 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)]
   !
   !  These integrals also contain the pseudopotential integrals.
   END_DOC
+
   implicit none
-  double precision               :: alpha, beta, gama, delta
-  integer                        :: num_A,num_B
-  double precision               :: A_center(3),B_center(3),C_center(3)
-  integer                        :: power_A(3),power_B(3)
-  integer                        :: i,j,k,l,n_pt_in,m
-  double precision               :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
+  integer          :: num_A, num_B, power_A(3), power_B(3)
+  integer          :: i, j, k, l, n_pt_in, m
+  double precision :: alpha, beta
+  double precision :: A_center(3),B_center(3),C_center(3)
+  double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
+
+  ao_integrals_n_e = 0.d0
 
   if (read_ao_integrals_n_e) then
+
     call ezfio_get_ao_one_e_ints_ao_integrals_n_e(ao_integrals_n_e)
     print *,  'AO N-e integrals read from disk'
+
   else
 
-    ao_integrals_n_e = 0.d0
+    if(use_cosgtos) then
+      !print *, " use_cosgtos for ao_integrals_n_e ?", use_cosgtos
 
-    !        _
-    ! /|  / |_)
-    !  | /  | \
-    !
+      do j = 1, ao_num
+        do i = 1, ao_num
+          ao_integrals_n_e(i,j) = ao_integrals_n_e_cosgtos(i,j)
+        enddo
+      enddo
 
-    !$OMP PARALLEL                                                   &
-        !$OMP DEFAULT (NONE)                                         &
-        !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
-        !$OMP          num_A,num_B,Z,c,n_pt_in)                      &
-        !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,&
-        !$OMP         n_pt_max_integrals,ao_integrals_n_e,nucl_num,nucl_charge)
+    else
 
-    n_pt_in = n_pt_max_integrals
+      !$OMP PARALLEL                                                   &
+          !$OMP DEFAULT (NONE)                                         &
+          !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
+          !$OMP          num_A,num_B,Z,c,c1,n_pt_in)                      &
+          !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,nucl_coord,ao_coef_normalized_ordered_transp,&
+          !$OMP         n_pt_max_integrals,ao_integrals_n_e,nucl_num,nucl_charge)
 
-    !$OMP DO SCHEDULE (dynamic)
+      n_pt_in = n_pt_max_integrals
 
-    do j = 1, ao_num
-      num_A = ao_nucl(j)
-      power_A(1:3)= ao_power(j,1:3)
-      A_center(1:3) = nucl_coord(num_A,1:3)
+      !$OMP DO SCHEDULE (dynamic)
 
-      do i = 1, ao_num
+      do j = 1, ao_num
+        num_A = ao_nucl(j)
+        power_A(1:3)= ao_power(j,1:3)
+        A_center(1:3) = nucl_coord(num_A,1:3)
 
-        num_B = ao_nucl(i)
-        power_B(1:3)= ao_power(i,1:3)
-        B_center(1:3) = nucl_coord(num_B,1:3)
+        do i = 1, ao_num
 
-        do l=1,ao_prim_num(j)
-          alpha = ao_expo_ordered_transp(l,j)
+          num_B = ao_nucl(i)
+          power_B(1:3)= ao_power(i,1:3)
+          B_center(1:3) = nucl_coord(num_B,1:3)
 
-          do m=1,ao_prim_num(i)
-            beta = ao_expo_ordered_transp(m,i)
+          do l=1,ao_prim_num(j)
+            alpha = ao_expo_ordered_transp(l,j)
 
-            double precision               :: c
-            c = 0.d0
+            do m=1,ao_prim_num(i)
+              beta = ao_expo_ordered_transp(m,i)
 
-            do  k = 1, nucl_num
-              double precision               :: Z
-              Z = nucl_charge(k)
+              double precision               :: c, c1
+              c = 0.d0
 
-              C_center(1:3) = nucl_coord(k,1:3)
+              do  k = 1, nucl_num
+                double precision               :: Z
+                Z = nucl_charge(k)
 
-              c = c - Z * NAI_pol_mult(A_center,B_center,            &
-                  power_A,power_B,alpha,beta,C_center,n_pt_in)
+                C_center(1:3) = nucl_coord(k,1:3)
 
+                !print *, ' '
+                !print *, A_center, B_center, C_center, power_A, power_B
+                !print *, alpha, beta
+
+                c1 = NAI_pol_mult( A_center, B_center, power_A, power_B &
+                                 , alpha, beta, C_center, n_pt_in )
+
+                !print *, ' c1 = ', c1
+
+                c = c - Z * c1
+
+              enddo
+              ao_integrals_n_e(i,j) = ao_integrals_n_e(i,j)  &
+                  + ao_coef_normalized_ordered_transp(l,j)             &
+                  * ao_coef_normalized_ordered_transp(m,i) * c
             enddo
-            ao_integrals_n_e(i,j) = ao_integrals_n_e(i,j)  &
-                + ao_coef_normalized_ordered_transp(l,j)             &
-                * ao_coef_normalized_ordered_transp(m,i) * c
           enddo
         enddo
       enddo
-    enddo
 
     !$OMP END DO
     !$OMP END PARALLEL
-    IF (DO_PSEUDO) THEN
+
+    endif
+
+
+    IF(do_pseudo) THEN
        ao_integrals_n_e += ao_pseudo_integrals
     ENDIF
-    IF(point_charges) THEN
-       ao_integrals_n_e += ao_integrals_pt_chrg
-    ENDIF
-
 
   endif
 
@@ -102,7 +122,7 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e_imag, (ao_num,ao_num)]
   !  :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle`
   END_DOC
   implicit none
-  double precision               :: alpha, beta, gama, delta
+  double precision               :: alpha, beta
   integer                        :: num_A,num_B
   double precision               :: A_center(3),B_center(3),C_center(3)
   integer                        :: power_A(3),power_B(3)
@@ -125,7 +145,7 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e_per_atom, (ao_num,ao_num,nuc
 ! :math:`\langle \chi_i | -\frac{1}{|r-R_A|} | \chi_j \rangle`
   END_DOC
   implicit none
-  double precision               :: alpha, beta, gama, delta
+  double precision               :: alpha, beta
   integer                        :: i_c,num_A,num_B
   double precision               :: A_center(3),B_center(3),C_center(3)
   integer                        :: power_A(3),power_B(3)
@@ -268,6 +288,7 @@ double precision function NAI_pol_mult(A_center,B_center,power_A,power_B,alpha,b
 
 end
 
+! ---
 
 subroutine give_polynomial_mult_center_one_e(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out)
   implicit none
@@ -579,61 +600,3 @@ double precision function V_r(n,alpha)
 end
 
 
-double precision function V_phi(n,m)
-  implicit none
-  BEGIN_DOC
-  ! Computes the angular $\phi$ part of the nuclear attraction integral:
-  !
-  ! $\int_{0}^{2 \pi} \cos(\phi)^n \sin(\phi)^m d\phi$.
-  END_DOC
-  integer                        :: n,m, i
-  double precision               :: prod, Wallis
-  prod = 1.d0
-  do i = 0,shiftr(n,1)-1
-    prod = prod/ (1.d0 + dfloat(m+1)/dfloat(n-i-i-1))
-  enddo
-  V_phi = 4.d0 * prod * Wallis(m)
-end
-
-
-double precision function V_theta(n,m)
-  implicit none
-  BEGIN_DOC
-  ! Computes the angular $\theta$ part of the nuclear attraction integral:
-  !
-  ! $\int_{0}^{\pi} \cos(\theta)^n \sin(\theta)^m d\theta$
-  END_DOC
-  integer                        :: n,m,i
-  double precision               :: Wallis, prod
-  include 'utils/constants.include.F'
-  V_theta = 0.d0
-  prod = 1.d0
-  do i = 0,shiftr(n,1)-1
-    prod = prod / (1.d0 + dfloat(m+1)/dfloat(n-i-i-1))
-  enddo
-  V_theta = (prod+prod) * Wallis(m)
-end
-
-
-double precision function Wallis(n)
-  implicit none
-  BEGIN_DOC
-  ! Wallis integral:
-  !
-  ! $\int_{0}^{\pi} \cos(\theta)^n d\theta$.
-  END_DOC
-  double precision               :: fact
-  integer                        :: n,p
-  include 'utils/constants.include.F'
-  if(iand(n,1).eq.0)then
-    Wallis = fact(shiftr(n,1))
-    Wallis = pi * fact(n) / (dble(ibset(0_8,n)) * (Wallis+Wallis)*Wallis)
-  else
-    p = shiftr(n,1)
-    Wallis = fact(p)
-    Wallis = dble(ibset(0_8,p+p)) * Wallis*Wallis / fact(p+p+1)
-  endif
-
-end
-
-
diff --git a/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f b/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f
index 7a567979..963a49a6 100644
--- a/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f
+++ b/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f
@@ -53,13 +53,13 @@ subroutine compute_ao_tc_sym_two_e_pot_jl(j, l, n_integrals, buffer_i, buffer_va
       integral_erf = ao_two_e_integral_erf(i, k, j, l)
       integral     = integral_erf + integral_pot
 
-      if( j1b_type .eq. 1 ) then
-        !print *, ' j1b type 1 is added'
-        integral = integral + j1b_gauss_2e_j1(i, k, j, l)
-      elseif( j1b_type .eq. 2 ) then
-        !print *, ' j1b type 2 is added'
-        integral = integral + j1b_gauss_2e_j2(i, k, j, l)
-      endif
+      !if( j1b_type .eq. 1 ) then
+      !  !print *, ' j1b type 1 is added'
+      !  integral = integral + j1b_gauss_2e_j1(i, k, j, l)
+      !elseif( j1b_type .eq. 2 ) then
+      !  !print *, ' j1b type 2 is added'
+      !  integral = integral + j1b_gauss_2e_j2(i, k, j, l)
+      !endif
 
       if(abs(integral) < thr) then
         cycle
diff --git a/src/ao_two_e_ints/two_e_integrals.irp.f b/src/ao_two_e_ints/two_e_integrals.irp.f
index 4c3c6190..835dc89a 100644
--- a/src/ao_two_e_ints/two_e_integrals.irp.f
+++ b/src/ao_two_e_ints/two_e_integrals.irp.f
@@ -1,102 +1,123 @@
-double precision function ao_two_e_integral(i,j,k,l)
-  implicit none
+
+! ---
+
+double precision function ao_two_e_integral(i, j, k, l)
+
   BEGIN_DOC
   !  integral of the AO basis <ik|jl> or (ij|kl)
   !     i(r1) j(r1) 1/r12 k(r2) l(r2)
   END_DOC
 
-  integer,intent(in)             :: i,j,k,l
-  integer                        :: p,q,r,s
-  double precision               :: I_center(3),J_center(3),K_center(3),L_center(3)
-  integer                        :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3)
-  double precision               :: integral
+  implicit none
   include 'utils/constants.include.F'
+
+  integer, intent(in)            :: i, j, k, l
+
+  integer                        :: p, q, r, s
+  integer                        :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3)
+  integer                        :: iorder_p(3), iorder_q(3)
+  double precision               :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision               :: integral
   double precision               :: P_new(0:max_dim,3),P_center(3),fact_p,pp
   double precision               :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq
-  integer                        :: iorder_p(3), iorder_q(3)
+
   double precision               :: ao_two_e_integral_schwartz_accel
 
-   if (ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then
-     ao_two_e_integral = ao_two_e_integral_schwartz_accel(i,j,k,l)
-   else
+  double precision               :: ao_two_e_integral_cosgtos
 
-    dim1 = n_pt_max_integrals
 
-    num_i = ao_nucl(i)
-    num_j = ao_nucl(j)
-    num_k = ao_nucl(k)
-    num_l = ao_nucl(l)
-    ao_two_e_integral = 0.d0
+  if(use_cosgtos) then
+    !print *, ' use_cosgtos for ao_two_e_integral ?', use_cosgtos
 
-    if (num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k)then
-      do p = 1, 3
-        I_power(p) = ao_power(i,p)
-        J_power(p) = ao_power(j,p)
-        K_power(p) = ao_power(k,p)
-        L_power(p) = ao_power(l,p)
-        I_center(p) = nucl_coord(num_i,p)
-        J_center(p) = nucl_coord(num_j,p)
-        K_center(p) = nucl_coord(num_k,p)
-        L_center(p) = nucl_coord(num_l,p)
-      enddo
+    ao_two_e_integral = ao_two_e_integral_cosgtos(i, j, k, l)
 
-      double precision               :: coef1, coef2, coef3, coef4
-      double precision               :: p_inv,q_inv
-      double precision               :: general_primitive_integral
+  else
 
-      do p = 1, ao_prim_num(i)
-        coef1 = ao_coef_normalized_ordered_transp(p,i)
-        do q = 1, ao_prim_num(j)
-          coef2 = coef1*ao_coef_normalized_ordered_transp(q,j)
-          call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,&
-              ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),                 &
-              I_power,J_power,I_center,J_center,dim1)
-          p_inv = 1.d0/pp
-          do r = 1, ao_prim_num(k)
-            coef3 = coef2*ao_coef_normalized_ordered_transp(r,k)
-            do s = 1, ao_prim_num(l)
-              coef4 = coef3*ao_coef_normalized_ordered_transp(s,l)
-              call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,&
-                  ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),             &
-                  K_power,L_power,K_center,L_center,dim1)
-              q_inv = 1.d0/qq
-              integral = general_primitive_integral(dim1,              &
-                  P_new,P_center,fact_p,pp,p_inv,iorder_p,             &
-                  Q_new,Q_center,fact_q,qq,q_inv,iorder_q)
-              ao_two_e_integral = ao_two_e_integral +  coef4 * integral
-            enddo ! s
-          enddo  ! r
-        enddo   ! q
-      enddo    ! p
+    if (ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then
+
+       ao_two_e_integral = ao_two_e_integral_schwartz_accel(i,j,k,l)
 
     else
 
-      do p = 1, 3
-        I_power(p) = ao_power(i,p)
-        J_power(p) = ao_power(j,p)
-        K_power(p) = ao_power(k,p)
-        L_power(p) = ao_power(l,p)
-      enddo
-      double  precision              :: ERI
+      dim1 = n_pt_max_integrals
 
-      do p = 1, ao_prim_num(i)
-        coef1 = ao_coef_normalized_ordered_transp(p,i)
-        do q = 1, ao_prim_num(j)
-          coef2 = coef1*ao_coef_normalized_ordered_transp(q,j)
-          do r = 1, ao_prim_num(k)
-            coef3 = coef2*ao_coef_normalized_ordered_transp(r,k)
-            do s = 1, ao_prim_num(l)
-              coef4 = coef3*ao_coef_normalized_ordered_transp(s,l)
-              integral = ERI(                                          &
-                  ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),&
-                  I_power(1),J_power(1),K_power(1),L_power(1),         &
-                  I_power(2),J_power(2),K_power(2),L_power(2),         &
-                  I_power(3),J_power(3),K_power(3),L_power(3))
-              ao_two_e_integral = ao_two_e_integral + coef4 * integral
-            enddo ! s
-          enddo  ! r
-        enddo   ! q
-      enddo    ! p
+      num_i = ao_nucl(i)
+      num_j = ao_nucl(j)
+      num_k = ao_nucl(k)
+      num_l = ao_nucl(l)
+      ao_two_e_integral = 0.d0
+
+      if (num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k)then
+        do p = 1, 3
+          I_power(p) = ao_power(i,p)
+          J_power(p) = ao_power(j,p)
+          K_power(p) = ao_power(k,p)
+          L_power(p) = ao_power(l,p)
+          I_center(p) = nucl_coord(num_i,p)
+          J_center(p) = nucl_coord(num_j,p)
+          K_center(p) = nucl_coord(num_k,p)
+          L_center(p) = nucl_coord(num_l,p)
+        enddo
+
+        double precision               :: coef1, coef2, coef3, coef4
+        double precision               :: p_inv,q_inv
+        double precision               :: general_primitive_integral
+
+        do p = 1, ao_prim_num(i)
+          coef1 = ao_coef_normalized_ordered_transp(p,i)
+          do q = 1, ao_prim_num(j)
+            coef2 = coef1*ao_coef_normalized_ordered_transp(q,j)
+            call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,&
+                ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),                 &
+                I_power,J_power,I_center,J_center,dim1)
+            p_inv = 1.d0/pp
+            do r = 1, ao_prim_num(k)
+              coef3 = coef2*ao_coef_normalized_ordered_transp(r,k)
+              do s = 1, ao_prim_num(l)
+                coef4 = coef3*ao_coef_normalized_ordered_transp(s,l)
+                call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,&
+                    ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),             &
+                    K_power,L_power,K_center,L_center,dim1)
+                q_inv = 1.d0/qq
+                integral = general_primitive_integral(dim1,              &
+                    P_new,P_center,fact_p,pp,p_inv,iorder_p,             &
+                    Q_new,Q_center,fact_q,qq,q_inv,iorder_q)
+                ao_two_e_integral = ao_two_e_integral +  coef4 * integral
+              enddo ! s
+            enddo  ! r
+          enddo   ! q
+        enddo    ! p
+
+      else
+
+        do p = 1, 3
+          I_power(p) = ao_power(i,p)
+          J_power(p) = ao_power(j,p)
+          K_power(p) = ao_power(k,p)
+          L_power(p) = ao_power(l,p)
+        enddo
+        double  precision              :: ERI
+
+        do p = 1, ao_prim_num(i)
+          coef1 = ao_coef_normalized_ordered_transp(p,i)
+          do q = 1, ao_prim_num(j)
+            coef2 = coef1*ao_coef_normalized_ordered_transp(q,j)
+            do r = 1, ao_prim_num(k)
+              coef3 = coef2*ao_coef_normalized_ordered_transp(r,k)
+              do s = 1, ao_prim_num(l)
+                coef4 = coef3*ao_coef_normalized_ordered_transp(s,l)
+                integral = ERI(                                          &
+                    ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),&
+                    I_power(1),J_power(1),K_power(1),L_power(1),         &
+                    I_power(2),J_power(2),K_power(2),L_power(2),         &
+                    I_power(3),J_power(3),K_power(3),L_power(3))
+                ao_two_e_integral = ao_two_e_integral + coef4 * integral
+              enddo ! s
+            enddo  ! r
+          enddo   ! q
+        enddo    ! p
+
+      endif
 
     endif
 
@@ -104,6 +125,8 @@ double precision function ao_two_e_integral(i,j,k,l)
 
 end
 
+! ---
+
 double precision function ao_two_e_integral_schwartz_accel(i,j,k,l)
   implicit none
   BEGIN_DOC
@@ -421,14 +444,17 @@ BEGIN_PROVIDER [ logical, ao_two_e_integrals_in_map ]
 
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz,(ao_num,ao_num)  ]
-  implicit none
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz, (ao_num, ao_num) ]
+
   BEGIN_DOC
   !  Needed to compute Schwartz inequalities
   END_DOC
 
-  integer                        :: i,k
-  double precision               :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2
+  implicit none
+  integer          :: i, k
+  double precision :: ao_two_e_integral,cpu_1,cpu_2, wall_1, wall_2
 
   ao_two_e_integral_schwartz(1,1) = ao_two_e_integral(1,1,1,1)
   !$OMP PARALLEL DO PRIVATE(i,k)                                     &
@@ -445,6 +471,7 @@ BEGIN_PROVIDER [ double precision, ao_two_e_integral_schwartz,(ao_num,ao_num)  ]
 
 END_PROVIDER
 
+! ---
 
 double precision function general_primitive_integral(dim,            &
       P_new,P_center,fact_p,p,p_inv,iorder_p,                        &
diff --git a/src/becke_numerical_grid/EZFIO.cfg b/src/becke_numerical_grid/EZFIO.cfg
index 4083e0e7..7861f074 100644
--- a/src/becke_numerical_grid/EZFIO.cfg
+++ b/src/becke_numerical_grid/EZFIO.cfg
@@ -64,3 +64,15 @@ doc: Number of angular extra_grid points given from input. Warning, this number
 interface: ezfio,provider,ocaml
 default: 1202
 
+[rad_grid_type]
+type: character*(32)
+doc: method used to sample the radial space. Possible choices are [KNOWLES | GILL]
+interface: ezfio,provider,ocaml
+default: KNOWLES
+
+[extra_rad_grid_type]
+type: character*(32)
+doc: method used to sample the radial space. Possible choices are [KNOWLES | GILL]
+interface: ezfio,provider,ocaml
+default: KNOWLES
+
diff --git a/src/becke_numerical_grid/extra_grid.irp.f b/src/becke_numerical_grid/extra_grid.irp.f
index db691e55..9bd24f22 100644
--- a/src/becke_numerical_grid/extra_grid.irp.f
+++ b/src/becke_numerical_grid/extra_grid.irp.f
@@ -1,96 +1,149 @@
 
+! ---
+
  BEGIN_PROVIDER [integer, n_points_extra_radial_grid]
 &BEGIN_PROVIDER [integer, n_points_extra_integration_angular]
- implicit none
- BEGIN_DOC
- ! n_points_extra_radial_grid = number of radial grid points_extra per atom
- !
- ! n_points_extra_integration_angular = number of angular grid points_extra per atom
- !
- ! These numbers are automatically set by setting the grid_type_sgn parameter
- END_DOC
-if(.not.my_extra_grid_becke)then
- select case (extra_grid_type_sgn)
-    case(0)
-     n_points_extra_radial_grid = 23
-     n_points_extra_integration_angular = 170
-    case(1)
-      n_points_extra_radial_grid = 50
-      n_points_extra_integration_angular = 194
-    case(2)
-      n_points_extra_radial_grid = 75
-      n_points_extra_integration_angular = 302
-    case(3)
-      n_points_extra_radial_grid = 99
-      n_points_extra_integration_angular = 590
-    case default
-      write(*,*) '!!! Quadrature grid not available !!!'
-      stop
-  end select
-else
- n_points_extra_radial_grid = my_n_pt_r_extra_grid
- n_points_extra_integration_angular = my_n_pt_a_extra_grid
-endif
+
+  BEGIN_DOC
+  ! n_points_extra_radial_grid = number of radial grid points_extra per atom
+  !
+  ! n_points_extra_integration_angular = number of angular grid points_extra per atom
+  !
+  ! These numbers are automatically set by setting the grid_type_sgn parameter
+  END_DOC
+ 
+  implicit none
+
+  if(.not.my_extra_grid_becke)then
+    select case (extra_grid_type_sgn)
+      case(0)
+        n_points_extra_radial_grid = 23
+        n_points_extra_integration_angular = 170
+      case(1)
+        n_points_extra_radial_grid = 50
+        n_points_extra_integration_angular = 194
+      case(2)
+        n_points_extra_radial_grid = 75
+        n_points_extra_integration_angular = 302
+      case(3)
+        n_points_extra_radial_grid = 99
+        n_points_extra_integration_angular = 590
+      case default
+        write(*,*) '!!! Quadrature grid not available !!!'
+        stop
+    end select
+  else
+    n_points_extra_radial_grid = my_n_pt_r_extra_grid
+    n_points_extra_integration_angular = my_n_pt_a_extra_grid
+  endif
+
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [integer, n_points_extra_grid_per_atom]
-  implicit none
+
   BEGIN_DOC
   ! Number of grid points_extra per atom
   END_DOC
+
+  implicit none
   n_points_extra_grid_per_atom = n_points_extra_integration_angular * n_points_extra_radial_grid
 
 END_PROVIDER
 
+! ---
+
  BEGIN_PROVIDER [double precision, grid_points_extra_radial, (n_points_extra_radial_grid)]
 &BEGIN_PROVIDER [double precision, dr_radial_extra_integral]
 
-  implicit none
   BEGIN_DOC
   ! points_extra in [0,1] to map the radial integral [0,\infty]
   END_DOC
+
+  implicit none
+  integer :: i
+
   dr_radial_extra_integral = 1.d0/dble(n_points_extra_radial_grid-1)
-  integer                        :: i
   do i = 1, n_points_extra_radial_grid
     grid_points_extra_radial(i) = dble(i-1) * dr_radial_extra_integral
   enddo
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, grid_points_extra_per_atom, (3,n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num)]
+
   BEGIN_DOC
   ! x,y,z coordinates of grid points_extra used for integration in 3d space
   END_DOC
+
   implicit none
-  integer                        :: i,j,k
-  double precision               :: dr,x_ref,y_ref,z_ref
-  double precision               :: knowles_function
-  do i = 1, nucl_num
-    x_ref = nucl_coord(i,1)
-    y_ref = nucl_coord(i,2)
-    z_ref = nucl_coord(i,3)
-    do j = 1, n_points_extra_radial_grid-1
-      double precision               :: x,r
-      ! x value for the mapping of the [0, +\infty] to [0,1]
-      x = grid_points_extra_radial(j)
+  integer                    :: i, j, k
+  double precision           :: dr, x_ref, y_ref, z_ref
+  double precision           :: x, r, tmp
+  double precision, external :: knowles_function
 
-      ! value of the radial coordinate for the integration
-      r = knowles_function(alpha_knowles(grid_atomic_number(i)),m_knowles,x)
+  grid_points_extra_per_atom = 0.d0
 
-      ! explicit values of the grid points_extra centered around each atom
-      do k = 1, n_points_extra_integration_angular
-        grid_points_extra_per_atom(1,k,j,i) =                              &
-            x_ref + angular_quadrature_points_extra(k,1) * r
-        grid_points_extra_per_atom(2,k,j,i) =                              &
-            y_ref + angular_quadrature_points_extra(k,2) * r
-        grid_points_extra_per_atom(3,k,j,i) =                              &
-            z_ref + angular_quadrature_points_extra(k,3) * r
+  PROVIDE extra_rad_grid_type
+  if(extra_rad_grid_type .eq. "KNOWLES") then
+
+    do i = 1, nucl_num
+      x_ref = nucl_coord(i,1)
+      y_ref = nucl_coord(i,2)
+      z_ref = nucl_coord(i,3)
+      do j = 1, n_points_extra_radial_grid-1
+
+        ! x value for the mapping of the [0, +\infty] to [0,1]
+        x = grid_points_extra_radial(j)
+        ! value of the radial coordinate for the integration
+        r = knowles_function(alpha_knowles(grid_atomic_number(i)), m_knowles, x)
+
+        ! explicit values of the grid points_extra centered around each atom
+        do k = 1, n_points_extra_integration_angular
+          grid_points_extra_per_atom(1,k,j,i) = x_ref + angular_quadrature_points_extra(k,1) * r
+          grid_points_extra_per_atom(2,k,j,i) = y_ref + angular_quadrature_points_extra(k,2) * r
+          grid_points_extra_per_atom(3,k,j,i) = z_ref + angular_quadrature_points_extra(k,3) * r
+        enddo
       enddo
     enddo
-  enddo
+
+  elseif(extra_rad_grid_type .eq. "GILL") then
+    ! GILL & CHIEN, 2002
+
+    do i = 1, nucl_num
+      x_ref = nucl_coord(i,1)
+      y_ref = nucl_coord(i,2)
+      z_ref = nucl_coord(i,3)
+      do j = 1, n_points_extra_radial_grid-1
+
+        r = R_gill * dble(j-1)**2 / dble(n_points_extra_radial_grid-j+1)**2
+
+        ! explicit values of the grid points_extra centered around each atom
+        do k = 1, n_points_extra_integration_angular
+          grid_points_extra_per_atom(1,k,j,i) = x_ref + angular_quadrature_points_extra(k,1) * r
+          grid_points_extra_per_atom(2,k,j,i) = y_ref + angular_quadrature_points_extra(k,2) * r
+          grid_points_extra_per_atom(3,k,j,i) = z_ref + angular_quadrature_points_extra(k,3) * r
+        enddo
+      enddo
+    enddo
+
+  else
+
+    print*, " extra_rad_grid_type = ", extra_rad_grid_type, ' is not implemented'
+    stop
+
+  endif
+
+
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num) ]
+
   BEGIN_DOC
   ! Weight function at grid points_extra : w_n(r) according to the equation (22)
   ! of Becke original paper (JCP, 88, 1988)
@@ -99,11 +152,14 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration
   ! represented by the last dimension and the points_extra are labelled by the
   ! other dimensions.
   END_DOC
+
   implicit none
-  integer                        :: i,j,k,l,m
-  double precision               :: r(3)
-  double precision               :: accu,cell_function_becke
-  double precision               :: tmp_array(nucl_num)
+  integer                    :: i, j, k, l, m
+  double precision           :: r(3)
+  double precision           :: accu
+  double precision           :: tmp_array(nucl_num)
+  double precision, external :: cell_function_becke
+
   ! run over all points_extra in space
   ! that are referred to each atom
   do j = 1, nucl_num
@@ -114,6 +170,7 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration
         r(1) = grid_points_extra_per_atom(1,l,k,j)
         r(2) = grid_points_extra_per_atom(2,l,k,j)
         r(3) = grid_points_extra_per_atom(3,l,k,j)
+
         accu = 0.d0
         ! For each of these points_extra in space, ou need to evaluate the P_n(r)
         do i = 1, nucl_num
@@ -124,18 +181,19 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration
         enddo
         accu = 1.d0/accu
         weight_at_r_extra(l,k,j) = tmp_array(j) * accu
+
         if(isnan(weight_at_r_extra(l,k,j)))then
-         print*,'isnan(weight_at_r_extra(l,k,j))'
-         print*,l,k,j
-         accu = 0.d0
-         do i = 1, nucl_num
-           ! function defined for each atom "i" by equation (13) and (21) with k == 3
-           tmp_array(i) = cell_function_becke(r,i) ! P_n(r)
-           print*,i,tmp_array(i)
-           ! Then you compute the summ the P_n(r) function for each of the "r" points_extra
-           accu += tmp_array(i)
-         enddo
-         write(*,'(100(F16.10,X))')tmp_array(j) , accu
+          print*,'isnan(weight_at_r_extra(l,k,j))'
+          print*,l,k,j
+          accu = 0.d0
+          do i = 1, nucl_num
+            ! function defined for each atom "i" by equation (13) and (21) with k == 3
+            tmp_array(i) = cell_function_becke(r,i) ! P_n(r)
+            print*,i,tmp_array(i)
+            ! Then you compute the summ the P_n(r) function for each of the "r" points_extra
+            accu += tmp_array(i)
+          enddo
+          write(*,'(100(F16.10,X))')tmp_array(j) , accu
           stop
         endif
       enddo
@@ -144,35 +202,73 @@ BEGIN_PROVIDER [double precision, weight_at_r_extra, (n_points_extra_integration
 
 END_PROVIDER
 
+! ---
+
 
 BEGIN_PROVIDER [double precision, final_weight_at_r_extra, (n_points_extra_integration_angular,n_points_extra_radial_grid,nucl_num) ]
+
   BEGIN_DOC
    ! Total weight on each grid point which takes into account all Lebedev, Voronoi and radial weights.
   END_DOC
+
   implicit none
-  integer                        :: i,j,k,l,m
-  double precision               :: r(3)
-  double precision               :: accu,cell_function_becke
-  double precision               :: tmp_array(nucl_num)
-  double precision               :: contrib_integration,x
-  double precision               :: derivative_knowles_function,knowles_function
-  ! run over all points_extra in space
-  do j = 1, nucl_num  ! that are referred to each atom
-    do i = 1, n_points_extra_radial_grid -1 !for each radial grid attached to the "jth" atom
-      x = grid_points_extra_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1]
-      do k = 1, n_points_extra_integration_angular  ! for each angular point attached to the "jth" atom
-        contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)&
-            *knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)**2
-        final_weight_at_r_extra(k,i,j) = weights_angular_points_extra(k)  * weight_at_r_extra(k,i,j) * contrib_integration * dr_radial_extra_integral
-        if(isnan(final_weight_at_r_extra(k,i,j)))then
-         print*,'isnan(final_weight_at_r_extra(k,i,j))' 
-         print*,k,i,j
-         write(*,'(100(F16.10,X))')weights_angular_points_extra(k)  , weight_at_r_extra(k,i,j) , contrib_integration , dr_radial_extra_integral
-         stop 
-        endif
+  integer                    :: i, j, k, l, m
+  double precision           :: r(3)
+  double precision           :: tmp_array(nucl_num)
+  double precision           :: contrib_integration, x, tmp
+  double precision, external :: derivative_knowles_function, knowles_function
+
+  PROVIDE extra_rad_grid_type
+  if(extra_rad_grid_type .eq. "KNOWLES") then
+
+    ! run over all points_extra in space
+    do j = 1, nucl_num  ! that are referred to each atom
+      do i = 1, n_points_extra_radial_grid -1 !for each radial grid attached to the "jth" atom
+        x = grid_points_extra_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1]
+        do k = 1, n_points_extra_integration_angular  ! for each angular point attached to the "jth" atom
+          contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)&
+                              * knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)**2
+          final_weight_at_r_extra(k,i,j) = weights_angular_points_extra(k) * weight_at_r_extra(k,i,j) * contrib_integration * dr_radial_extra_integral
+          if(isnan(final_weight_at_r_extra(k,i,j)))then
+            print*,'isnan(final_weight_at_r_extra(k,i,j))' 
+            print*,k,i,j
+            write(*,'(100(F16.10,X))')weights_angular_points_extra(k)  , weight_at_r_extra(k,i,j) , contrib_integration , dr_radial_extra_integral
+            stop 
+          endif
+        enddo
       enddo
     enddo
-  enddo
+
+  elseif(extra_rad_grid_type .eq. "GILL") then
+    ! GILL & CHIEN, 2002
+
+    PROVIDE R_gill
+    tmp = 2.d0 * R_gill * R_gill * R_gill * dble(n_points_extra_radial_grid)
+
+    ! run over all points_extra in space
+    do j = 1, nucl_num  ! that are referred to each atom
+      do i = 1, n_points_extra_radial_grid -1 !for each radial grid attached to the "jth" atom
+        contrib_integration = tmp * dble(i-1)**5 / dble(n_points_extra_radial_grid-i+1)**7
+
+        do k = 1, n_points_extra_integration_angular  ! for each angular point attached to the "jth" atom
+          final_weight_at_r_extra(k,i,j) = weights_angular_points_extra(k) * weight_at_r_extra(k,i,j) * contrib_integration
+          if(isnan(final_weight_at_r_extra(k,i,j)))then
+            print*,'isnan(final_weight_at_r_extra(k,i,j))' 
+            print*,k,i,j
+            write(*,'(100(F16.10,X))') weights_angular_points_extra(k), weight_at_r_extra(k,i,j), contrib_integration
+            stop 
+          endif
+        enddo
+      enddo
+    enddo
+
+  else
+
+    print*, " extra_rad_grid_type = ", extra_rad_grid_type, ' is not implemented'
+    stop
+
+  endif
+
 
 END_PROVIDER
 
diff --git a/src/becke_numerical_grid/extra_grid_vector.irp.f b/src/becke_numerical_grid/extra_grid_vector.irp.f
index 3a5e6d3c..e4fc03b5 100644
--- a/src/becke_numerical_grid/extra_grid_vector.irp.f
+++ b/src/becke_numerical_grid/extra_grid_vector.irp.f
@@ -1,26 +1,35 @@
 
+! ---
+
 BEGIN_PROVIDER [integer, n_points_extra_final_grid]
-  implicit none
+
   BEGIN_DOC
   ! Number of points_extra which are non zero
   END_DOC
-  integer                        :: i,j,k,l
+
+  implicit none
+  integer :: i, j, k, l
+
   n_points_extra_final_grid = 0
+
   do j = 1, nucl_num
     do i = 1, n_points_extra_radial_grid -1
       do k = 1, n_points_extra_integration_angular
-        if(dabs(final_weight_at_r_extra(k,i,j)) < thresh_extra_grid)then
+        if(dabs(final_weight_at_r_extra(k,i,j)) < thresh_extra_grid) then
           cycle
         endif
         n_points_extra_final_grid += 1
       enddo
     enddo
   enddo
+
   print*,'n_points_extra_final_grid = ',n_points_extra_final_grid
   print*,'n max point         = ',n_points_extra_integration_angular*(n_points_extra_radial_grid*nucl_num - 1)
 !  call ezfio_set_becke_numerical_grid_n_points_extra_final_grid(n_points_extra_final_grid)
 END_PROVIDER
 
+! ---
+
  BEGIN_PROVIDER [double precision, final_grid_points_extra, (3,n_points_extra_final_grid)]
 &BEGIN_PROVIDER [double precision, final_weight_at_r_vector_extra, (n_points_extra_final_grid) ]
 &BEGIN_PROVIDER [integer, index_final_points_extra, (3,n_points_extra_final_grid) ]
diff --git a/src/becke_numerical_grid/grid_becke.irp.f b/src/becke_numerical_grid/grid_becke.irp.f
index 79f15c9a..21b9f98d 100644
--- a/src/becke_numerical_grid/grid_becke.irp.f
+++ b/src/becke_numerical_grid/grid_becke.irp.f
@@ -1,103 +1,174 @@
+
+! ---
+
  BEGIN_PROVIDER [integer, n_points_radial_grid]
 &BEGIN_PROVIDER [integer, n_points_integration_angular]
- implicit none
- BEGIN_DOC
- ! n_points_radial_grid = number of radial grid points per atom
- !
- ! n_points_integration_angular = number of angular grid points per atom
- !
- ! These numbers are automatically set by setting the grid_type_sgn parameter
- END_DOC
-if(.not.my_grid_becke)then
- select case (grid_type_sgn)
-    case(0)
-     n_points_radial_grid = 23
-     n_points_integration_angular = 170
-    case(1)
-      n_points_radial_grid = 50
-      n_points_integration_angular = 194
-    case(2)
-      n_points_radial_grid = 75
-      n_points_integration_angular = 302
-    case(3)
-      n_points_radial_grid = 99
-      n_points_integration_angular = 590
-    case default
-      write(*,*) '!!! Quadrature grid not available !!!'
-      stop
-  end select
-else
- n_points_radial_grid = my_n_pt_r_grid
- n_points_integration_angular = my_n_pt_a_grid
-endif
+
+  BEGIN_DOC
+  ! n_points_radial_grid = number of radial grid points per atom
+  !
+  ! n_points_integration_angular = number of angular grid points per atom
+  !
+  ! These numbers are automatically set by setting the grid_type_sgn parameter
+  END_DOC
+ 
+  implicit none
+
+  if(.not.my_grid_becke)then
+    select case (grid_type_sgn)
+      case(0)
+        n_points_radial_grid = 23
+        n_points_integration_angular = 170
+      case(1)
+        n_points_radial_grid = 50
+        n_points_integration_angular = 194
+      case(2)
+        n_points_radial_grid = 75
+        n_points_integration_angular = 302
+      case(3)
+        n_points_radial_grid = 99
+        n_points_integration_angular = 590
+      case default
+        write(*,*) '!!! Quadrature grid not available !!!'
+        stop
+    end select
+  else
+    n_points_radial_grid         = my_n_pt_r_grid
+    n_points_integration_angular = my_n_pt_a_grid
+  endif
+
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [integer, n_points_grid_per_atom]
-  implicit none
+
   BEGIN_DOC
   ! Number of grid points per atom
   END_DOC
+
+  implicit none
+
   n_points_grid_per_atom = n_points_integration_angular * n_points_radial_grid
 
 END_PROVIDER
 
-BEGIN_PROVIDER [integer , m_knowles]
-  implicit none
+! ---
+
+BEGIN_PROVIDER [integer, m_knowles]
+
   BEGIN_DOC
   ! value of the "m" parameter in the equation (7) of the paper of Knowles (JCP, 104, 1996)
   END_DOC
+
+  implicit none
+
   m_knowles = 3
+
 END_PROVIDER
 
+! ---
+
+BEGIN_PROVIDER [double precision, R_gill]
+
+  implicit none
+
+  R_gill = 3.d0
+
+END_PROVIDER
+
+! ---
+
  BEGIN_PROVIDER [double precision, grid_points_radial, (n_points_radial_grid)]
 &BEGIN_PROVIDER [double precision, dr_radial_integral]
 
-  implicit none
   BEGIN_DOC
   ! points in [0,1] to map the radial integral [0,\infty]
   END_DOC
-  dr_radial_integral = 1.d0/dble(n_points_radial_grid-1)
-  integer                        :: i
+
+  implicit none
+  integer :: i
+
+  dr_radial_integral = 1.d0 / dble(n_points_radial_grid-1)
+
   do i = 1, n_points_radial_grid
     grid_points_radial(i) = dble(i-1) * dr_radial_integral
   enddo
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, grid_points_per_atom, (3,n_points_integration_angular,n_points_radial_grid,nucl_num)]
+
   BEGIN_DOC
   ! x,y,z coordinates of grid points used for integration in 3d space
   END_DOC
+
   implicit none
-  integer                        :: i,j,k
-  double precision               :: dr,x_ref,y_ref,z_ref
-  double precision               :: knowles_function
-  do i = 1, nucl_num
-    x_ref = nucl_coord(i,1)
-    y_ref = nucl_coord(i,2)
-    z_ref = nucl_coord(i,3)
-    do j = 1, n_points_radial_grid-1
-      double precision               :: x,r
-      ! x value for the mapping of the [0, +\infty] to [0,1]
-      x = grid_points_radial(j)
+  integer                    :: i, j, k
+  double precision           :: dr, x_ref, y_ref, z_ref
+  double precision           :: x, r, tmp
+  double precision, external :: knowles_function
 
-      ! value of the radial coordinate for the integration
-      r = knowles_function(alpha_knowles(grid_atomic_number(i)),m_knowles,x)
+  grid_points_per_atom = 0.d0
 
-      ! explicit values of the grid points centered around each atom
-      do k = 1, n_points_integration_angular
-        grid_points_per_atom(1,k,j,i) =                              &
-            x_ref + angular_quadrature_points(k,1) * r
-        grid_points_per_atom(2,k,j,i) =                              &
-            y_ref + angular_quadrature_points(k,2) * r
-        grid_points_per_atom(3,k,j,i) =                              &
-            z_ref + angular_quadrature_points(k,3) * r
+  PROVIDE rad_grid_type
+  if(rad_grid_type .eq. "KNOWLES") then
+
+    do i = 1, nucl_num
+      x_ref = nucl_coord(i,1)
+      y_ref = nucl_coord(i,2)
+      z_ref = nucl_coord(i,3)
+      do j = 1, n_points_radial_grid-1
+
+        ! x value for the mapping of the [0, +\infty] to [0,1]
+        x = grid_points_radial(j)
+        ! value of the radial coordinate for the integration
+        r = knowles_function(alpha_knowles(grid_atomic_number(i)), m_knowles, x)
+
+        ! explicit values of the grid points centered around each atom
+        do k = 1, n_points_integration_angular
+          grid_points_per_atom(1,k,j,i) = x_ref + angular_quadrature_points(k,1) * r
+          grid_points_per_atom(2,k,j,i) = y_ref + angular_quadrature_points(k,2) * r
+          grid_points_per_atom(3,k,j,i) = z_ref + angular_quadrature_points(k,3) * r
+        enddo
       enddo
     enddo
-  enddo
+
+  elseif(rad_grid_type .eq. "GILL") then
+    ! GILL & CHIEN, 2002
+
+    do i = 1, nucl_num
+      x_ref = nucl_coord(i,1)
+      y_ref = nucl_coord(i,2)
+      z_ref = nucl_coord(i,3)
+      do j = 1, n_points_radial_grid-1
+
+        r = R_gill * dble(j-1)**2 / dble(n_points_radial_grid-j+1)**2 
+
+        ! explicit values of the grid points centered around each atom
+        do k = 1, n_points_integration_angular
+          grid_points_per_atom(1,k,j,i) = x_ref + angular_quadrature_points(k,1) * r
+          grid_points_per_atom(2,k,j,i) = y_ref + angular_quadrature_points(k,2) * r
+          grid_points_per_atom(3,k,j,i) = z_ref + angular_quadrature_points(k,3) * r
+        enddo
+      enddo
+    enddo
+ 
+  else
+
+    print*, " rad_grid_type = ", rad_grid_type, ' is not implemented'
+    stop
+
+  endif
+
 END_PROVIDER
 
-BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num) ]
+! ---
+
+BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num)]
+
   BEGIN_DOC
   ! Weight function at grid points : w_n(r) according to the equation (22)
   ! of Becke original paper (JCP, 88, 1988)
@@ -106,11 +177,13 @@ BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_p
   ! represented by the last dimension and the points are labelled by the
   ! other dimensions.
   END_DOC
+
   implicit none
-  integer                        :: i,j,k,l,m
-  double precision               :: r(3)
-  double precision               :: accu,cell_function_becke
-  double precision               :: tmp_array(nucl_num)
+  integer                    :: i, j, k, l, m
+  double precision           :: r(3), accu
+  double precision           :: tmp_array(nucl_num)
+  double precision, external :: cell_function_becke
+
   ! run over all points in space
   ! that are referred to each atom
   do j = 1, nucl_num
@@ -121,28 +194,30 @@ BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_p
         r(1) = grid_points_per_atom(1,l,k,j)
         r(2) = grid_points_per_atom(2,l,k,j)
         r(3) = grid_points_per_atom(3,l,k,j)
+
         accu = 0.d0
         ! For each of these points in space, ou need to evaluate the P_n(r)
         do i = 1, nucl_num
           ! function defined for each atom "i" by equation (13) and (21) with k == 3
-          tmp_array(i) = cell_function_becke(r,i) ! P_n(r)
+          tmp_array(i) = cell_function_becke(r, i) ! P_n(r)
           ! Then you compute the summ the P_n(r) function for each of the "r" points
           accu += tmp_array(i)
         enddo
         accu = 1.d0/accu
         weight_at_r(l,k,j) = tmp_array(j) * accu
-        if(isnan(weight_at_r(l,k,j)))then
-         print*,'isnan(weight_at_r(l,k,j))'
-         print*,l,k,j
-         accu = 0.d0
-         do i = 1, nucl_num
-           ! function defined for each atom "i" by equation (13) and (21) with k == 3
-           tmp_array(i) = cell_function_becke(r,i) ! P_n(r)
-           print*,i,tmp_array(i)
-           ! Then you compute the summ the P_n(r) function for each of the "r" points
-           accu += tmp_array(i)
-         enddo
-         write(*,'(100(F16.10,X))')tmp_array(j) , accu
+
+        if(isnan(weight_at_r(l,k,j))) then
+          print*,'isnan(weight_at_r(l,k,j))'
+          print*,l,k,j
+          accu = 0.d0
+          do i = 1, nucl_num
+            ! function defined for each atom "i" by equation (13) and (21) with k == 3
+            tmp_array(i) = cell_function_becke(r,i) ! P_n(r)
+            print*,i,tmp_array(i)
+            ! Then you compute the summ the P_n(r) function for each of the "r" points
+            accu += tmp_array(i)
+          enddo
+          write(*,'(100(F16.10,X))')tmp_array(j) , accu
           stop
         endif
       enddo
@@ -151,35 +226,76 @@ BEGIN_PROVIDER [double precision, weight_at_r, (n_points_integration_angular,n_p
 
 END_PROVIDER
 
+! ---
+
+BEGIN_PROVIDER [double precision, final_weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num)]
 
-BEGIN_PROVIDER [double precision, final_weight_at_r, (n_points_integration_angular,n_points_radial_grid,nucl_num) ]
   BEGIN_DOC
-   ! Total weight on each grid point which takes into account all Lebedev, Voronoi and radial weights.
+  ! Total weight on each grid point which takes into account all Lebedev, Voronoi and radial weights.
   END_DOC
+
   implicit none
-  integer                        :: i,j,k,l,m
-  double precision               :: r(3)
-  double precision               :: accu,cell_function_becke
-  double precision               :: tmp_array(nucl_num)
-  double precision               :: contrib_integration,x
-  double precision               :: derivative_knowles_function,knowles_function
-  ! run over all points in space
-  do j = 1, nucl_num  ! that are referred to each atom
-    do i = 1, n_points_radial_grid -1 !for each radial grid attached to the "jth" atom
-      x = grid_points_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1]
-      do k = 1, n_points_integration_angular  ! for each angular point attached to the "jth" atom
-        contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)&
-            *knowles_function(alpha_knowles(grid_atomic_number(j)),m_knowles,x)**2
-        final_weight_at_r(k,i,j) = weights_angular_points(k)  * weight_at_r(k,i,j) * contrib_integration * dr_radial_integral
-        if(isnan(final_weight_at_r(k,i,j)))then
-         print*,'isnan(final_weight_at_r(k,i,j))' 
-         print*,k,i,j
-         write(*,'(100(F16.10,X))')weights_angular_points(k)  , weight_at_r(k,i,j) , contrib_integration , dr_radial_integral
-         stop 
-        endif
+  integer                    :: i, j, k, l, m
+  double precision           :: r(3)
+  double precision           :: tmp_array(nucl_num)
+  double precision           :: contrib_integration, x, tmp
+  double precision, external :: derivative_knowles_function, knowles_function
+
+  final_weight_at_r = 0.d0
+
+  PROVIDE rad_grid_type
+  if(rad_grid_type .eq. "KNOWLES") then
+
+    ! run over all points in space
+    do j = 1, nucl_num  ! that are referred to each atom
+      do i = 1, n_points_radial_grid -1 !for each radial grid attached to the "jth" atom
+        x = grid_points_radial(i) ! x value for the mapping of the [0, +\infty] to [0,1]
+
+        do k = 1, n_points_integration_angular  ! for each angular point attached to the "jth" atom
+          contrib_integration = derivative_knowles_function(alpha_knowles(grid_atomic_number(j)), m_knowles, x) &
+                              * knowles_function(alpha_knowles(grid_atomic_number(j)), m_knowles, x)**2
+
+          final_weight_at_r(k,i,j) = weights_angular_points(k)  * weight_at_r(k,i,j) * contrib_integration * dr_radial_integral
+
+          if(isnan(final_weight_at_r(k,i,j))) then
+           print*,'isnan(final_weight_at_r(k,i,j))' 
+           print*,k,i,j
+           write(*,'(100(F16.10,X))') weights_angular_points(k), weight_at_r(k,i,j), contrib_integration
+           stop 
+          endif
+        enddo
       enddo
     enddo
-  enddo
+
+  elseif(rad_grid_type .eq. "GILL") then
+    ! GILL & CHIEN, 2002
+
+    tmp = 2.d0 * R_gill * R_gill * R_gill * dble(n_points_radial_grid)
+
+    ! run over all points in space
+    do j = 1, nucl_num  ! that are referred to each atom
+      do i = 1, n_points_radial_grid - 1 !for each radial grid attached to the "jth" atom
+        contrib_integration = tmp * dble(i-1)**5 / dble(n_points_radial_grid-i+1)**7
+        do k = 1, n_points_integration_angular  ! for each angular point attached to the "jth" atom
+          final_weight_at_r(k,i,j) = weights_angular_points(k) * weight_at_r(k,i,j) * contrib_integration
+
+          if(isnan(final_weight_at_r(k,i,j))) then
+           print*,'isnan(final_weight_at_r(k,i,j))' 
+           print*,k,i,j
+           write(*,'(100(F16.10,X))') weights_angular_points(k), weight_at_r(k,i,j), contrib_integration, dr_radial_integral
+           stop 
+          endif
+        enddo
+      enddo
+    enddo
+
+  else
+
+    print*, " rad_grid_type = ", rad_grid_type, ' is not implemented'
+    stop
+
+  endif
 
 END_PROVIDER
 
+
diff --git a/src/becke_numerical_grid/grid_becke_vector.irp.f b/src/becke_numerical_grid/grid_becke_vector.irp.f
index 343bd054..fd185641 100644
--- a/src/becke_numerical_grid/grid_becke_vector.irp.f
+++ b/src/becke_numerical_grid/grid_becke_vector.irp.f
@@ -21,22 +21,27 @@ BEGIN_PROVIDER [integer, n_points_final_grid]
   call ezfio_set_becke_numerical_grid_n_points_final_grid(n_points_final_grid)
 END_PROVIDER
 
- BEGIN_PROVIDER [double precision, final_grid_points, (3,n_points_final_grid)]
-&BEGIN_PROVIDER [double precision, final_weight_at_r_vector, (n_points_final_grid) ]
-&BEGIN_PROVIDER [integer, index_final_points, (3,n_points_final_grid) ]
-&BEGIN_PROVIDER [integer, index_final_points_reverse, (n_points_integration_angular,n_points_radial_grid,nucl_num) ]
-  implicit none
+! ---
+
+ BEGIN_PROVIDER [double precision, final_grid_points,      (3,n_points_final_grid)]
+&BEGIN_PROVIDER [double precision, final_weight_at_r_vector, (n_points_final_grid)]
+&BEGIN_PROVIDER [integer,          index_final_points,     (3,n_points_final_grid)]
+&BEGIN_PROVIDER [integer, index_final_points_reverse, (n_points_integration_angular,n_points_radial_grid,nucl_num)]
+
   BEGIN_DOC
-!  final_grid_points(1:3,j) = (/ x, y, z /) of the jth grid point
-!
-! final_weight_at_r_vector(i) = Total weight function of the ith grid point which contains the Lebedev, Voronoi and radial weights contributions
-!
-! index_final_points(1:3,i) = gives the angular, radial and atomic indices associated to the ith grid point
-!
-! index_final_points_reverse(i,j,k) = index of the grid point having i as angular, j as radial and l as atomic indices
+  !  final_grid_points(1:3,j) = (/ x, y, z /) of the jth grid point
+  !
+  ! final_weight_at_r_vector(i) = Total weight function of the ith grid point which contains the Lebedev, Voronoi and radial weights contributions
+  !
+  ! index_final_points(1:3,i) = gives the angular, radial and atomic indices associated to the ith grid point
+  !
+  ! index_final_points_reverse(i,j,k) = index of the grid point having i as angular, j as radial and l as atomic indices
   END_DOC
-  integer                        :: i,j,k,l,i_count
-  double precision               :: r(3)
+
+  implicit none
+  integer          :: i, j, k, l, i_count
+  double precision :: r(3)
+
   i_count = 0
   do j = 1, nucl_num
     do i = 1, n_points_radial_grid -1
@@ -59,6 +64,8 @@ END_PROVIDER
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, final_grid_points_transp, (n_points_final_grid,3)]
   implicit none
   BEGIN_DOC
diff --git a/src/becke_numerical_grid/integration_radial.irp.f b/src/becke_numerical_grid/integration_radial.irp.f
index 44c83070..3de151ab 100644
--- a/src/becke_numerical_grid/integration_radial.irp.f
+++ b/src/becke_numerical_grid/integration_radial.irp.f
@@ -1,71 +1,93 @@
- double precision function knowles_function(alpha,m,x)
- implicit none
- BEGIN_DOC
-! Function proposed by Knowles (JCP, 104, 1996) for distributing the radial points :
-! the Log "m" function ( equation (7) in the paper )
- END_DOC
- double precision, intent(in) :: alpha,x
- integer, intent(in) :: m
-!print*, x
- knowles_function = -alpha * dlog(1.d0-x**m)
- end
 
- double precision function derivative_knowles_function(alpha,m,x)
- implicit none
- BEGIN_DOC
-! Derivative of the function proposed by Knowles (JCP, 104, 1996) for distributing the radial points
- END_DOC
- double precision, intent(in) :: alpha,x
- integer, intent(in) :: m
- double precision :: f
- f = x**(m-1)
- derivative_knowles_function = alpha * dble(m) * f / (1.d0 - x*f)
- end
+! ---
 
- BEGIN_PROVIDER [double precision, alpha_knowles, (100)]
- implicit none
- integer :: i
- BEGIN_DOC
-! Recommended values for the alpha parameters according to the paper of Knowles (JCP, 104, 1996)
-! as a function of the nuclear charge
- END_DOC
+double precision function knowles_function(alpha, m, x)
 
- ! H-He
- alpha_knowles(1) = 5.d0
- alpha_knowles(2) = 5.d0
+  BEGIN_DOC
+  ! Function proposed by Knowles (JCP, 104, 1996) for distributing the radial points :
+  ! the Log "m" function ( equation (7) in the paper )
+  END_DOC
+ 
+  implicit none
+  double precision, intent(in) :: alpha, x
+  integer,          intent(in) :: m
 
- ! Li-Be
- alpha_knowles(3) = 7.d0
- alpha_knowles(4) = 7.d0
+  !print*, x
+  knowles_function = -alpha * dlog(1.d0-x**m)
 
- ! B-Ne
- do i = 5, 10
-  alpha_knowles(i) = 5.d0
- enddo
+  return
+end
 
- ! Na-Mg
- do i = 11, 12
-  alpha_knowles(i) = 7.d0
- enddo
+! ---
 
- ! Al-Ar
- do i = 13, 18
-  alpha_knowles(i) = 5.d0
- enddo
+double precision function derivative_knowles_function(alpha, m, x)
 
- ! K-Ca
- do i = 19, 20
-  alpha_knowles(i) = 7.d0
- enddo
+  BEGIN_DOC
+  ! Derivative of the function proposed by Knowles (JCP, 104, 1996) for distributing the radial points
+  END_DOC
 
- ! Sc-Zn
- do i = 21, 30
-  alpha_knowles(i) = 5.d0
- enddo
+  implicit none
+  double precision, intent(in) :: alpha, x
+  integer,          intent(in) :: m
+  double precision             :: f
 
- ! Ga-Kr
- do i = 31, 100
-  alpha_knowles(i) = 7.d0
- enddo
+  f = x**(m-1)
+  derivative_knowles_function = alpha * dble(m) * f / (1.d0 - x*f)
+
+  return
+end
+
+! ---
+
+BEGIN_PROVIDER [double precision, alpha_knowles, (100)]
+
+  BEGIN_DOC
+  ! Recommended values for the alpha parameters according to the paper of Knowles (JCP, 104, 1996)
+  ! as a function of the nuclear charge
+  END_DOC
+
+  implicit none
+  integer :: i
+
+  ! H-He
+  alpha_knowles(1) = 5.d0
+  alpha_knowles(2) = 5.d0
+ 
+  ! Li-Be
+  alpha_knowles(3) = 7.d0
+  alpha_knowles(4) = 7.d0
+ 
+  ! B-Ne
+  do i = 5, 10
+   alpha_knowles(i) = 5.d0
+  enddo
+ 
+  ! Na-Mg
+  do i = 11, 12
+   alpha_knowles(i) = 7.d0
+  enddo
+ 
+  ! Al-Ar
+  do i = 13, 18
+   alpha_knowles(i) = 5.d0
+  enddo
+ 
+  ! K-Ca
+  do i = 19, 20
+   alpha_knowles(i) = 7.d0
+  enddo
+ 
+  ! Sc-Zn
+  do i = 21, 30
+   alpha_knowles(i) = 5.d0
+  enddo
+ 
+  ! Ga-Kr
+  do i = 31, 100
+   alpha_knowles(i) = 7.d0
+  enddo
+
+END_PROVIDER
+
+! ---
 
- END_PROVIDER
diff --git a/src/becke_numerical_grid/step_function_becke.irp.f b/src/becke_numerical_grid/step_function_becke.irp.f
index 2905c6c0..6048c35f 100644
--- a/src/becke_numerical_grid/step_function_becke.irp.f
+++ b/src/becke_numerical_grid/step_function_becke.irp.f
@@ -20,31 +20,42 @@ double precision function f_function_becke(x)
   f_function_becke = 1.5d0 * x - 0.5d0 * x*x*x
 end
 
-double precision function cell_function_becke(r,atom_number)
-  implicit none
-  double precision, intent(in)   :: r(3)
-  integer, intent(in)            :: atom_number
+! ---
+
+double precision function cell_function_becke(r, atom_number)
+
   BEGIN_DOC
-! atom_number :: atom on which the cell function of Becke (1988, JCP,88(4))
+  ! atom_number :: atom on which the cell function of Becke (1988, JCP,88(4))
   ! r(1:3)                       :: x,y,z coordinantes of the current point
   END_DOC
-  double precision               :: mu_ij,nu_ij
-  double precision               :: distance_i,distance_j,step_function_becke
-  integer                        :: j
-  distance_i = (r(1) - nucl_coord_transp(1,atom_number) ) * (r(1) - nucl_coord_transp(1,atom_number))
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  integer,          intent(in) :: atom_number
+  integer                      :: j
+  double precision             :: mu_ij, nu_ij
+  double precision             :: distance_i, distance_j, step_function_becke
+
+  distance_i  = (r(1) - nucl_coord_transp(1,atom_number) ) * (r(1) - nucl_coord_transp(1,atom_number))
   distance_i += (r(2) - nucl_coord_transp(2,atom_number) ) * (r(2) - nucl_coord_transp(2,atom_number))
   distance_i += (r(3) - nucl_coord_transp(3,atom_number) ) * (r(3) - nucl_coord_transp(3,atom_number))
-  distance_i = dsqrt(distance_i)
+  distance_i  = dsqrt(distance_i)
+
   cell_function_becke = 1.d0
   do j = 1, nucl_num
-    if(j==atom_number)cycle
-    distance_j = (r(1) - nucl_coord_transp(1,j) ) * (r(1) - nucl_coord_transp(1,j))
-    distance_j+= (r(2) - nucl_coord_transp(2,j) ) * (r(2) - nucl_coord_transp(2,j))
-    distance_j+= (r(3) - nucl_coord_transp(3,j) ) * (r(3) - nucl_coord_transp(3,j))
-    distance_j = dsqrt(distance_j)
-    mu_ij = (distance_i - distance_j)*nucl_dist_inv(atom_number,j)
+    if(j==atom_number) cycle
+
+    distance_j  = (r(1) - nucl_coord_transp(1,j) ) * (r(1) - nucl_coord_transp(1,j))
+    distance_j += (r(2) - nucl_coord_transp(2,j) ) * (r(2) - nucl_coord_transp(2,j))
+    distance_j += (r(3) - nucl_coord_transp(3,j) ) * (r(3) - nucl_coord_transp(3,j))
+    distance_j  = dsqrt(distance_j)
+
+    mu_ij = (distance_i - distance_j) * nucl_dist_inv(atom_number,j)
     nu_ij = mu_ij + slater_bragg_type_inter_distance_ua(atom_number,j) * (1.d0 - mu_ij*mu_ij)
+
     cell_function_becke *= step_function_becke(nu_ij)
   enddo
+
+  return
 end
 
diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f
index 7f89899b..5f2795f1 100644
--- a/src/bi_ort_ints/one_e_bi_ort.irp.f
+++ b/src/bi_ort_ints/one_e_bi_ort.irp.f
@@ -8,19 +8,22 @@ BEGIN_PROVIDER [double precision, ao_one_e_integrals_tc_tot, (ao_num,ao_num)]
 
   ao_one_e_integrals_tc_tot = ao_one_e_integrals      
 
-  provide j1b_type
+  !provide j1b_type
 
-  if( (j1b_type .eq. 1) .or. (j1b_type .eq. 2) ) then
+  !if( (j1b_type .eq. 1) .or. (j1b_type .eq. 2) ) then
+  !
+  !  print *, ' do things properly !'
+  !  stop
 
-    do i = 1, ao_num
-      do j = 1, ao_num
-        ao_one_e_integrals_tc_tot(j,i) += ( j1b_gauss_hermI  (j,i) &
-                                          + j1b_gauss_hermII (j,i) &
-                                          + j1b_gauss_nonherm(j,i) )
-      enddo
-    enddo
+  !  !do i = 1, ao_num
+  !  !  do j = 1, ao_num
+  !  !    ao_one_e_integrals_tc_tot(j,i) += ( j1b_gauss_hermI  (j,i) &
+  !  !                                      + j1b_gauss_hermII (j,i) &
+  !  !                                      + j1b_gauss_nonherm(j,i) )
+  !  !  enddo
+  !  !enddo
 
-  endif
+  !endif
 
 END_PROVIDER
 
diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
index 4694a998..0d727785 100644
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -110,27 +110,36 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
   print *, ' providing int2_grad1_u12_ao_transp ...'
   call wall_time(wall0)
 
-  if(test_cycle_tc)then
-   do ipoint = 1, n_points_final_grid
-     do i = 1, ao_num
-       do j = 1, ao_num
-         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1)
-         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2)
-         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3)
-       enddo
-     enddo
-   enddo
+  if(test_cycle_tc) then
+
+    PROVIDE int2_grad1_u12_ao_test
+
+    do ipoint = 1, n_points_final_grid
+      do i = 1, ao_num
+        do j = 1, ao_num
+          int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1)
+          int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2)
+          int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3)
+        enddo
+      enddo
+    enddo
+
   else
-   do ipoint = 1, n_points_final_grid
-     do i = 1, ao_num
-       do j = 1, ao_num
-         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1)
-         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2)
-         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3)
-       enddo
-     enddo
-   enddo
+
+    PROVIDE int2_grad1_u12_ao
+
+    do ipoint = 1, n_points_final_grid
+      do i = 1, ao_num
+        do j = 1, ao_num
+          int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1)
+          int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2)
+          int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3)
+        enddo
+      enddo
+    enddo
+
   endif
+
   call wall_time(wall1)
   print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0
 
@@ -144,9 +153,12 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num,
   integer :: ipoint
   double precision :: wall0, wall1
 
-  !print *, ' providing int2_grad1_u12_bimo_transp'
+  PROVIDE mo_l_coef mo_r_coef
+  PROVIDE int2_grad1_u12_ao_transp
+
+  !print *, ' providing int2_grad1_u12_bimo_transp'
+  !call wall_time(wall0)
 
-  call wall_time(wall0)
   !$OMP PARALLEL         &
   !$OMP DEFAULT (NONE)   &
   !$OMP PRIVATE (ipoint) & 
@@ -163,25 +175,31 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num,
   !$OMP END DO
   !$OMP END PARALLEL
 
-  call wall_time(wall1)
+  !call wall_time(wall1)
   !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3, mo_num, mo_num )]
- implicit none
- integer          :: i, j, ipoint
- do ipoint = 1, n_points_final_grid
-   do i = 1, mo_num
-     do j = 1, mo_num
-      int2_grad1_u12_bimo_t(ipoint,1,j,i) = int2_grad1_u12_bimo_transp(j,i,1,ipoint)
-      int2_grad1_u12_bimo_t(ipoint,2,j,i) = int2_grad1_u12_bimo_transp(j,i,2,ipoint)
-      int2_grad1_u12_bimo_t(ipoint,3,j,i) = int2_grad1_u12_bimo_transp(j,i,3,ipoint)
-     enddo                                  
-   enddo
- enddo
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid, 3, mo_num, mo_num)]
+
+  implicit none
+  integer :: i, j, ipoint
+
+  PROVIDE mo_l_coef mo_r_coef
+  PROVIDE int2_grad1_u12_bimo_transp
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, mo_num
+      do j = 1, mo_num
+        int2_grad1_u12_bimo_t(ipoint,1,j,i) = int2_grad1_u12_bimo_transp(j,i,1,ipoint)
+        int2_grad1_u12_bimo_t(ipoint,2,j,i) = int2_grad1_u12_bimo_transp(j,i,2,ipoint)
+        int2_grad1_u12_bimo_t(ipoint,3,j,i) = int2_grad1_u12_bimo_transp(j,i,3,ipoint)
+      enddo                                  
+    enddo
+  enddo
+
 END_PROVIDER 
 
 ! ---
diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
index 48fa84f7..e8b56307 100644
--- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
+++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
@@ -81,21 +81,24 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
   integer                       :: ipoint
   double precision              :: weight
 
+  PROVIDE mo_l_coef mo_r_coef
+  PROVIDE int2_grad1_u12_bimo_t
+
   integral = 0.d0
   do ipoint = 1, n_points_final_grid
     weight = final_weight_at_r_vector(ipoint)                                                                          
 
-    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i)        & 
-              * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) &
-                + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) &
+    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & 
+              * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j)    &
+                + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j)    &
                 + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) )
-    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j)        & 
-              * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) &
-                + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) &
+    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & 
+              * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i)    &
                 + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) )
-    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m)        &
-              * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) &
-                + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) &
+    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) &
+              * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i)    &
                 + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) )
 
   enddo
diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f
index f5f5959a..721ea0c8 100644
--- a/src/bi_ort_ints/total_twoe_pot.irp.f
+++ b/src/bi_ort_ints/total_twoe_pot.irp.f
@@ -20,6 +20,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_vartc_tot, (ao_num, ao_num, ao_num, a
   enddo
 
 END_PROVIDER 
+
 ! ---
 
 BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_num) ]
@@ -40,20 +41,7 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
 
   provide j1b_type
 
-  if(j1b_type .eq. 3) then
-
-    do j = 1, ao_num
-      do l = 1, ao_num
-        do i = 1, ao_num
-          do k = 1, ao_num
-            ao_two_e_tc_tot(k,i,l,j) = ao_tc_int_chemist(k,i,l,j)
-            !write(222,*) ao_two_e_tc_tot(k,i,l,j) 
-          enddo
-        enddo
-      enddo
-    enddo
-
-  else
+  if(j1b_type .eq. 0) then
 
     PROVIDE ao_tc_sym_two_e_pot_in_map
 
@@ -77,6 +65,21 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
       enddo
     enddo
 
+  else
+
+    PROVIDE ao_tc_int_chemist
+
+    do j = 1, ao_num
+      do l = 1, ao_num
+        do i = 1, ao_num
+          do k = 1, ao_num
+            ao_two_e_tc_tot(k,i,l,j) = ao_tc_int_chemist(k,i,l,j)
+            !write(222,*) ao_two_e_tc_tot(k,i,l,j) 
+          enddo
+        enddo
+      enddo
+    enddo
+
   endif
 
 END_PROVIDER 
diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f
index d51999fc..c69309d1 100644
--- a/src/bi_ortho_mos/mos_rl.irp.f
+++ b/src/bi_ortho_mos/mos_rl.irp.f
@@ -17,6 +17,8 @@ subroutine ao_to_mo_bi_ortho(A_ao, LDA_ao, A_mo, LDA_mo)
   double precision, intent(out) :: A_mo(LDA_mo,mo_num)
   double precision, allocatable :: T(:,:)
 
+  PROVIDE mo_l_coef mo_r_coef
+
   allocate ( T(ao_num,mo_num) )
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
 
@@ -54,6 +56,8 @@ subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao)
   double precision, intent(out) :: A_ao(LDA_ao,ao_num)
   double precision, allocatable :: tmp_1(:,:), tmp_2(:,:)
 
+  PROVIDE mo_l_coef mo_r_coef
+
   ! ao_overlap x mo_r_coef
   allocate( tmp_1(ao_num,mo_num) )
   call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
diff --git a/src/bi_ortho_mos/overlap.irp.f b/src/bi_ortho_mos/overlap.irp.f
index d7f45c94..ff5d5c84 100644
--- a/src/bi_ortho_mos/overlap.irp.f
+++ b/src/bi_ortho_mos/overlap.irp.f
@@ -12,32 +12,27 @@
   double precision              :: accu_d, accu_nd 
   double precision, allocatable :: tmp(:,:)
   
- ! TODO : re do the DEGEMM
+!  overlap_bi_ortho = 0.d0
+!  do i = 1, mo_num
+!    do k = 1, mo_num
+!      do m = 1, ao_num
+!        do n = 1, ao_num
+!          overlap_bi_ortho(k,i) += ao_overlap(n,m) * mo_l_coef(n,k) * mo_r_coef(m,i)
+!        enddo
+!      enddo
+!    enddo
+!  enddo
 
-  overlap_bi_ortho = 0.d0
-  do i = 1, mo_num
-    do k = 1, mo_num
-      do m = 1, ao_num
-        do n = 1, ao_num
-          overlap_bi_ortho(k,i) += ao_overlap(n,m) * mo_l_coef(n,k) * mo_r_coef(m,i)
-        enddo
-      enddo
-    enddo
-  enddo
-
-!  allocate( tmp(mo_num,ao_num) )
-!
-!  ! tmp <-- L.T x S_ao
-!  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                         & 
-!            , mo_l_coef, size(mo_l_coef, 1), ao_overlap, size(ao_overlap, 1) &
-!            , 0.d0, tmp, size(tmp, 1) )
-!
-!  ! S <-- tmp x R
-!  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0           & 
-!            , tmp, size(tmp, 1), mo_r_coef, size(mo_r_coef, 1) &
-!            , 0.d0, overlap_bi_ortho, size(overlap_bi_ortho, 1) )
-!
-!  deallocate( tmp )
+  allocate( tmp(mo_num,ao_num) )
+  ! tmp <-- L.T x S_ao
+  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                   & 
+            , mo_l_coef(1,1), size(mo_l_coef, 1), ao_overlap(1,1), size(ao_overlap, 1) &
+            , 0.d0, tmp(1,1), size(tmp, 1) )
+  ! S <-- tmp x R
+  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                     & 
+            , tmp(1,1), size(tmp, 1), mo_r_coef(1,1), size(mo_r_coef, 1) &
+            , 0.d0, overlap_bi_ortho(1,1), size(overlap_bi_ortho, 1) )
+  deallocate(tmp)
 
   do i = 1, mo_num
     overlap_diag_bi_ortho(i) = overlap_bi_ortho(i,i)
@@ -84,20 +79,41 @@ END_PROVIDER
   END_DOC
 
   implicit none
-  integer :: i, j, p, q
+  integer                       :: i, j, p, q
+  double precision, allocatable :: tmp(:,:)
 
-  overlap_mo_r = 0.d0
-  overlap_mo_l = 0.d0
-  do i = 1, mo_num
-    do j = 1, mo_num
-      do p = 1, ao_num
-        do q = 1, ao_num
-          overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) 
-          overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p)
-        enddo
-      enddo
-    enddo
-  enddo
+  !overlap_mo_r = 0.d0
+  !overlap_mo_l = 0.d0
+  !do i = 1, mo_num
+  !  do j = 1, mo_num
+  !    do p = 1, ao_num
+  !      do q = 1, ao_num
+  !        overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) 
+  !        overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p)
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  allocate( tmp(mo_num,ao_num) )
+
+  tmp = 0.d0
+  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                   & 
+            , mo_r_coef(1,1), size(mo_r_coef, 1), ao_overlap(1,1), size(ao_overlap, 1) &
+            , 0.d0, tmp(1,1), size(tmp, 1) )
+  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                     & 
+            , tmp(1,1), size(tmp, 1), mo_r_coef(1,1), size(mo_r_coef, 1) &
+            , 0.d0, overlap_mo_r(1,1), size(overlap_mo_r, 1) )
+
+  tmp = 0.d0
+  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                   & 
+            , mo_l_coef(1,1), size(mo_l_coef, 1), ao_overlap(1,1), size(ao_overlap, 1) &
+            , 0.d0, tmp(1,1), size(tmp, 1) )
+  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                     & 
+            , tmp(1,1), size(tmp, 1), mo_l_coef(1,1), size(mo_l_coef, 1) &
+            , 0.d0, overlap_mo_l(1,1), size(overlap_mo_l, 1) )
+
+  deallocate(tmp)
 
 END_PROVIDER 
 
diff --git a/src/cipsi_tc_bi_ortho/pt2.irp.f b/src/cipsi_tc_bi_ortho/pt2.irp.f
index 13b4dff4..833cc0ea 100644
--- a/src/cipsi_tc_bi_ortho/pt2.irp.f
+++ b/src/cipsi_tc_bi_ortho/pt2.irp.f
@@ -1,4 +1,4 @@
-subroutine pt2_tc_bi_ortho
+subroutine tc_pt2
   use selection_types
   implicit none
   BEGIN_DOC
@@ -15,7 +15,7 @@ subroutine pt2_tc_bi_ortho
   double precision, external :: memory_of_double
   double precision :: correlation_energy_ratio,E_denom,E_tc,norm
   double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:)
-  PROVIDE H_apply_buffer_allocated distributed_davidson mo_two_e_integrals_in_map
+  PROVIDE H_apply_buffer_allocated distributed_davidson 
 
   print*,'Diagonal elements of the Fock matrix '
   do i = 1, mo_num
@@ -44,24 +44,14 @@ subroutine pt2_tc_bi_ortho
   pt2_data % overlap= 0.d0
   pt2_data % variance = huge(1.e0)
 
-  if (s2_eig) then
-    call make_s2_eigenfunction
-  endif
+  !!!! WARNING  !!!! SEEMS TO BE PROBLEM WTH make_s2_eigenfunction !!!! THE DETERMINANTS CAN APPEAR TWICE IN THE WFT DURING SELECTION
+!  if (s2_eig) then
+!    call make_s2_eigenfunction
+!  endif
   print_pt2 = .False.
   call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
 !  call routine_save_right
 
-  if (N_det > N_det_max) then
-    psi_det(1:N_int,1:2,1:N_det) = psi_det_generators(1:N_int,1:2,1:N_det)
-    psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states)
-    N_det = N_det_max
-    soft_touch N_det psi_det psi_coef
-    if (s2_eig) then
-      call make_s2_eigenfunction
-    endif
-    print_pt2 = .False.
-    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
-  endif
 
   allocate(ept2(1000),pt1(1000),extrap_energy(100))
 
@@ -71,18 +61,11 @@ subroutine pt2_tc_bi_ortho
 ! soft_touch thresh_it_dav
 
   print_pt2 = .True.
-  to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
-  to_select = max(N_states_diag, to_select)
-
-  E_denom = E_tc ! TC Energy of the current wave function 
   call pt2_dealloc(pt2_data)
   call pt2_dealloc(pt2_data_err)
   call pt2_alloc(pt2_data, N_states)
   call pt2_alloc(pt2_data_err, N_states)
-  call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
-
-  N_iter += 1
-
+  call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection
   call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
 
 end
diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f
index 4c271a4b..77377554 100644
--- a/src/cipsi_tc_bi_ortho/selection.irp.f
+++ b/src/cipsi_tc_bi_ortho/selection.irp.f
@@ -868,7 +868,6 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
         ! <det|H(j)|psi_0> and transpose
         ! -------------------------------------------
 
-!        call htilde_mu_mat_bi_ortho_tot(det, det, N_int, Hii)
         double precision               :: hmono, htwoe, hthree
         call diag_htilde_mu_mat_fock_bi_ortho(N_int, det, hmono, htwoe, hthree, hii)
         do istate = 1,N_states
@@ -878,8 +877,8 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
             psi_h_alpha = 0.d0
             alpha_h_psi = 0.d0
             do iii = 1, N_det_selectors
-              call htilde_mu_mat_bi_ortho_tot(psi_selectors(1,1,iii), det, N_int, i_h_alpha)
-              call htilde_mu_mat_bi_ortho_tot(det, psi_selectors(1,1,iii), N_int, alpha_h_i)
+              call htilde_mu_mat_bi_ortho_tot_slow(psi_selectors(1,1,iii), det, N_int, i_h_alpha)
+              call htilde_mu_mat_bi_ortho_tot_slow(det, psi_selectors(1,1,iii), N_int, alpha_h_i)
               call get_excitation_degree(psi_selectors(1,1,iii), det,degree,N_int)
               if(degree == 0)then
                print*,'problem !!!'
diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg
new file mode 100644
index 00000000..8edeecd0
--- /dev/null
+++ b/src/cosgtos_ao_int/EZFIO.cfg
@@ -0,0 +1,19 @@
+[ao_expoim_cosgtos]
+type: double precision
+doc: imag part for Exponents for each primitive of each cosGTOs |AO|
+size: (ao_basis.ao_num,ao_basis.ao_prim_num_max)
+interface: ezfio, provider
+
+[use_cosgtos]
+type: logical
+doc: If true, use cosgtos for AO integrals
+interface: ezfio,provider,ocaml
+default: False
+
+[ao_integrals_threshold]
+type: Threshold
+doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero
+interface: ezfio,provider,ocaml
+default: 1.e-15
+ezfio_name: threshold_ao
+
diff --git a/src/cosgtos_ao_int/NEED b/src/cosgtos_ao_int/NEED
new file mode 100644
index 00000000..932f88a3
--- /dev/null
+++ b/src/cosgtos_ao_int/NEED
@@ -0,0 +1,2 @@
+ezfio_files
+ao_basis
diff --git a/src/cosgtos_ao_int/README.rst b/src/cosgtos_ao_int/README.rst
new file mode 100644
index 00000000..01f25d6d
--- /dev/null
+++ b/src/cosgtos_ao_int/README.rst
@@ -0,0 +1,4 @@
+==============
+cosgtos_ao_int
+==============
+
diff --git a/src/cosgtos_ao_int/aos_cosgtos.irp.f b/src/cosgtos_ao_int/aos_cosgtos.irp.f
new file mode 100644
index 00000000..6a4d54fd
--- /dev/null
+++ b/src/cosgtos_ao_int/aos_cosgtos.irp.f
@@ -0,0 +1,210 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_coef_norm_ord_transp_cosgtos, (ao_prim_num_max, ao_num) ]
+
+  implicit none
+  integer :: i, j
+
+  do j = 1, ao_num
+    do i = 1, ao_prim_num_max
+      ao_coef_norm_ord_transp_cosgtos(i,j) = ao_coef_norm_ord_cosgtos(j,i)
+    enddo
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ complex*16, ao_expo_ord_transp_cosgtos, (ao_prim_num_max, ao_num) ]
+
+  implicit none
+  integer :: i, j
+
+  do j = 1, ao_num
+    do i = 1, ao_prim_num_max
+      ao_expo_ord_transp_cosgtos(i,j) = ao_expo_ord_cosgtos(j,i)
+    enddo
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_coef_norm_cosgtos, (ao_num, ao_prim_num_max) ]
+
+  implicit none
+
+  integer          :: i, j, powA(3), nz
+  double precision :: norm
+  complex*16       :: overlap_x, overlap_y, overlap_z, C_A(3)
+  complex*16       :: integ1, integ2, expo
+
+  nz = 100
+
+  C_A(1) = (0.d0, 0.d0)
+  C_A(2) = (0.d0, 0.d0)
+  C_A(3) = (0.d0, 0.d0)
+
+  ao_coef_norm_cosgtos = 0.d0
+
+  do i = 1, ao_num
+
+    powA(1) = ao_power(i,1)
+    powA(2) = ao_power(i,2)
+    powA(3) = ao_power(i,3)
+
+    ! Normalization of the primitives
+    if(primitives_normalized) then
+
+      do j = 1, ao_prim_num(i)
+
+        expo = ao_expo(i,j) + (0.d0, 1.d0) * ao_expoim_cosgtos(i,j)
+
+        call overlap_cgaussian_xyz(C_A, C_A,        expo, expo, powA, powA, overlap_x, overlap_y, overlap_z, integ1, nz)
+        call overlap_cgaussian_xyz(C_A, C_A, conjg(expo), expo, powA, powA, overlap_x, overlap_y, overlap_z, integ2, nz)
+
+        norm = 2.d0 * real( integ1 + integ2 )
+
+        ao_coef_norm_cosgtos(i,j) = ao_coef(i,j) / dsqrt(norm)
+      enddo
+
+    else
+
+      do j = 1, ao_prim_num(i)
+        ao_coef_norm_cosgtos(i,j) = ao_coef(i,j)
+      enddo
+
+    endif
+
+  enddo
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, ao_coef_norm_ord_cosgtos, (ao_num, ao_prim_num_max) ]
+&BEGIN_PROVIDER [ complex*16      , ao_expo_ord_cosgtos,      (ao_num, ao_prim_num_max) ]
+
+  implicit none
+  integer          :: i, j
+  integer          :: iorder(ao_prim_num_max)
+  double precision :: d(ao_prim_num_max,3)
+
+  d = 0.d0
+
+  do i = 1, ao_num
+
+    do j = 1, ao_prim_num(i)
+      iorder(j) = j
+      d(j,1) = ao_expo(i,j)
+      d(j,2) = ao_coef_norm_cosgtos(i,j)
+      d(j,3) = ao_expoim_cosgtos(i,j)
+    enddo
+
+    call dsort     (d(1,1), iorder, ao_prim_num(i))
+    call dset_order(d(1,2), iorder, ao_prim_num(i))
+    call dset_order(d(1,3), iorder, ao_prim_num(i))
+
+    do j = 1, ao_prim_num(i)
+      ao_expo_ord_cosgtos     (i,j) = d(j,1) + (0.d0, 1.d0) * d(j,3)
+      ao_coef_norm_ord_cosgtos(i,j) = d(j,2)
+    enddo
+
+  enddo
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos,   (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_x, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_y, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_z, (ao_num, ao_num) ]
+
+  implicit none
+  integer          :: i, j, n, l, dim1, power_A(3), power_B(3)
+  double precision :: c, overlap, overlap_x, overlap_y, overlap_z
+  complex*16       :: alpha, beta, A_center(3), B_center(3)
+  complex*16       :: overlap1, overlap_x1, overlap_y1, overlap_z1
+  complex*16       :: overlap2, overlap_x2, overlap_y2, overlap_z2
+
+  ao_overlap_cosgtos   = 0.d0
+  ao_overlap_cosgtos_x = 0.d0
+  ao_overlap_cosgtos_y = 0.d0
+  ao_overlap_cosgtos_z = 0.d0
+
+  dim1 = 100
+
+ !$OMP PARALLEL DO SCHEDULE(GUIDED)                                                                 &
+ !$OMP DEFAULT(NONE)                                                                                &
+ !$OMP PRIVATE( A_center, B_center, power_A, power_B, alpha, beta, i, j, n, l, c                    &
+ !$OMP        , overlap_x , overlap_y , overlap_z , overlap                                         &
+ !$OMP        , overlap_x1, overlap_y1, overlap_z1, overlap1                                        &
+ !$OMP        , overlap_x2, overlap_y2, overlap_z2, overlap2 )                                      &
+ !$OMP SHARED( nucl_coord, ao_power, ao_prim_num, ao_num, ao_nucl, dim1                             &
+ !$OMP       , ao_overlap_cosgtos_x, ao_overlap_cosgtos_y, ao_overlap_cosgtos_z, ao_overlap_cosgtos &
+ !$OMP       , ao_coef_norm_ord_transp_cosgtos, ao_expo_ord_transp_cosgtos )
+
+  do j = 1, ao_num
+
+    A_center(1) = nucl_coord(ao_nucl(j),1) * (1.d0, 0.d0) 
+    A_center(2) = nucl_coord(ao_nucl(j),2) * (1.d0, 0.d0)
+    A_center(3) = nucl_coord(ao_nucl(j),3) * (1.d0, 0.d0)
+    power_A(1)  = ao_power(j,1)
+    power_A(2)  = ao_power(j,2)
+    power_A(3)  = ao_power(j,3)
+
+    do i = 1, ao_num
+
+      B_center(1) = nucl_coord(ao_nucl(i),1) * (1.d0, 0.d0) 
+      B_center(2) = nucl_coord(ao_nucl(i),2) * (1.d0, 0.d0)
+      B_center(3) = nucl_coord(ao_nucl(i),3) * (1.d0, 0.d0)
+      power_B(1)  = ao_power(i,1)
+      power_B(2)  = ao_power(i,2)
+      power_B(3)  = ao_power(i,3)
+
+      do n = 1, ao_prim_num(j)
+        alpha = ao_expo_ord_transp_cosgtos(n,j)
+
+        do l = 1, ao_prim_num(i)
+          c    = ao_coef_norm_ord_transp_cosgtos(n,j) * ao_coef_norm_ord_transp_cosgtos(l,i)
+          beta = ao_expo_ord_transp_cosgtos(l,i)
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                    , overlap_x1, overlap_y1, overlap_z1, overlap1, dim1 )
+
+          call overlap_cgaussian_xyz( A_center, B_center, conjg(alpha), beta, power_A, power_B &
+                                    , overlap_x2, overlap_y2, overlap_z2, overlap2, dim1       )
+
+          overlap_x = 2.d0 * real( overlap_x1 + overlap_x2 )
+          overlap_y = 2.d0 * real( overlap_y1 + overlap_y2 )
+          overlap_z = 2.d0 * real( overlap_z1 + overlap_z2 )
+          overlap   = 2.d0 * real( overlap1   + overlap2   )
+
+          ao_overlap_cosgtos(i,j) = ao_overlap_cosgtos(i,j) + c * overlap
+
+          if( isnan(ao_overlap_cosgtos(i,j)) ) then
+            print*,'i, j', i, j
+            print*,'l, n', l, n
+            print*,'c, overlap', c, overlap
+            print*, overlap_x, overlap_y, overlap_z
+            stop
+          endif
+
+          ao_overlap_cosgtos_x(i,j) = ao_overlap_cosgtos_x(i,j) + c * overlap_x
+          ao_overlap_cosgtos_y(i,j) = ao_overlap_cosgtos_y(i,j) + c * overlap_y
+          ao_overlap_cosgtos_z(i,j) = ao_overlap_cosgtos_z(i,j) + c * overlap_z
+
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+! ---
+
+
+
diff --git a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f
new file mode 100644
index 00000000..d65dfba5
--- /dev/null
+++ b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f
@@ -0,0 +1,7 @@
+program cosgtos_ao_int
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+end
diff --git a/src/ao_two_e_ints/gauss_legendre.irp.f b/src/cosgtos_ao_int/gauss_legendre.irp.f
similarity index 100%
rename from src/ao_two_e_ints/gauss_legendre.irp.f
rename to src/cosgtos_ao_int/gauss_legendre.irp.f
diff --git a/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f b/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f
new file mode 100644
index 00000000..7f94f226
--- /dev/null
+++ b/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f
@@ -0,0 +1,535 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_integrals_n_e_cosgtos, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  !  Nucleus-electron interaction, in the cosgtos |AO| basis set.
+  !
+  !  :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle`
+  !
+  END_DOC
+
+  implicit none
+  integer          :: num_A, num_B, power_A(3), power_B(3)
+  integer          :: i, j, k, l, n_pt_in, m
+  double precision :: c, Z, A_center(3), B_center(3), C_center(3)
+  complex*16       :: alpha, beta, c1, c2
+
+  complex*16       :: NAI_pol_mult_cosgtos
+
+  ao_integrals_n_e_cosgtos = 0.d0
+
+ !$OMP PARALLEL                                                                           &
+ !$OMP DEFAULT (NONE)                                                                     &
+ !$OMP PRIVATE ( i, j, k, l, m, alpha, beta, A_center, B_center, C_center                 &
+ !$OMP         , power_A, power_B, num_A, num_B, Z, c, c1, c2, n_pt_in )                  &
+ !$OMP SHARED ( ao_num, ao_prim_num, ao_nucl, nucl_coord, ao_power, nucl_num, nucl_charge &
+ !$OMP        , ao_expo_ord_transp_cosgtos, ao_coef_norm_ord_transp_cosgtos               &
+ !$OMP        , n_pt_max_integrals, ao_integrals_n_e_cosgtos )
+
+ n_pt_in = n_pt_max_integrals
+
+ !$OMP DO SCHEDULE (dynamic)
+
+  do j = 1, ao_num
+    num_A         = ao_nucl(j)
+    power_A(1:3)  = ao_power(j,1:3)
+    A_center(1:3) = nucl_coord(num_A,1:3)
+
+    do i = 1, ao_num
+      num_B         = ao_nucl(i)
+      power_B(1:3)  = ao_power(i,1:3)
+      B_center(1:3) = nucl_coord(num_B,1:3)
+
+      do l = 1, ao_prim_num(j)
+        alpha = ao_expo_ord_transp_cosgtos(l,j)
+
+        do m = 1, ao_prim_num(i)
+          beta = ao_expo_ord_transp_cosgtos(m,i)
+
+          c = 0.d0
+          do k = 1, nucl_num
+
+            Z = nucl_charge(k)
+
+            C_center(1:3) = nucl_coord(k,1:3)
+
+            !print *, ' '
+            !print *, A_center, B_center, C_center, power_A, power_B
+            !print *, real(alpha), real(beta)
+
+            c1 = NAI_pol_mult_cosgtos( A_center, B_center, power_A, power_B &
+                                     , alpha, beta, C_center, n_pt_in )
+
+            !c2 = c1
+            c2 = NAI_pol_mult_cosgtos( A_center, B_center, power_A, power_B  &
+                                     , conjg(alpha), beta, C_center, n_pt_in )
+
+            !print *, ' c1 = ', real(c1)
+            !print *, ' c2 = ', real(c2)
+
+            c = c - Z * 2.d0 * real(c1 + c2)
+
+          enddo
+          ao_integrals_n_e_cosgtos(i,j) = ao_integrals_n_e_cosgtos(i,j) &
+                      + ao_coef_norm_ord_transp_cosgtos(l,j)            &
+                      * ao_coef_norm_ord_transp_cosgtos(m,i) * c
+        enddo
+      enddo
+    enddo
+  enddo
+
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+END_PROVIDER
+
+! ---
+
+complex*16 function NAI_pol_mult_cosgtos(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in)
+
+  BEGIN_DOC
+  !
+  ! Computes the electron-nucleus attraction with two primitves cosgtos.
+  !
+  ! :math:`\langle g_i | \frac{1}{|r-R_c|} | g_j \rangle`
+  !
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,          intent(in) :: n_pt_in, power_A(3), power_B(3)
+  double precision, intent(in) :: C_center(3), A_center(3), B_center(3)
+  complex*16,       intent(in) :: alpha, beta
+
+  integer                      :: i, n_pt, n_pt_out
+  double precision             :: dist, const_mod
+  complex*16                   :: p, p_inv, rho, dist_integral, const, const_factor, coeff, factor
+  complex*16                   :: accu, P_center(3)
+  complex*16                   :: d(0:n_pt_in)
+
+  complex*16                   :: V_n_e_cosgtos
+  complex*16                   :: crint
+
+  if ( (A_center(1)/=B_center(1)) .or. (A_center(2)/=B_center(2)) .or. (A_center(3)/=B_center(3)) .or. &
+       (A_center(1)/=C_center(1)) .or. (A_center(2)/=C_center(2)) .or. (A_center(3)/=C_center(3)) ) then
+
+    continue
+
+  else
+
+    NAI_pol_mult_cosgtos = V_n_e_cosgtos( power_A(1), power_A(2), power_A(3) &
+                                        , power_B(1), power_B(2), power_B(3) &
+                                        , alpha, beta )
+    return
+
+  endif
+
+  p     = alpha + beta
+  p_inv = (1.d0, 0.d0) / p
+  rho   = alpha * beta * p_inv
+
+  dist          = 0.d0
+  dist_integral = (0.d0, 0.d0)
+  do i = 1, 3
+    P_center(i)    = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist          += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+    dist_integral += (P_center(i) - C_center(i)) * (P_center(i) - C_center(i))
+  enddo
+
+  const_factor = dist * rho
+  const        = p * dist_integral
+
+  const_mod = dsqrt(real(const_factor)*real(const_factor) + aimag(const_factor)*aimag(const_factor))
+  if(const_mod > 80.d0) then
+    NAI_pol_mult_cosgtos = (0.d0, 0.d0)
+    return
+  endif
+
+  factor = zexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv
+
+  do i = 0, n_pt_in
+    d(i) = (0.d0, 0.d0)
+  enddo
+
+  n_pt = 2 * ( (power_A(1) + power_B(1)) + (power_A(2) + power_B(2)) + (power_A(3) + power_B(3)) )
+  if(n_pt == 0) then
+    NAI_pol_mult_cosgtos = coeff * crint(0, const)
+    return
+  endif
+
+  call give_cpolynomial_mult_center_one_e( A_center, B_center, alpha, beta &
+                                         , power_A, power_B, C_center, n_pt_in, d, n_pt_out)
+
+  if(n_pt_out < 0) then
+    NAI_pol_mult_cosgtos = (0.d0, 0.d0)
+    return
+  endif
+
+  accu = (0.d0, 0.d0)
+  do i = 0, n_pt_out, 2
+    accu += crint(shiftr(i, 1), const) * d(i)
+
+!    print *, shiftr(i, 1), real(const), real(d(i)), real(crint(shiftr(i, 1), const))
+  enddo
+  NAI_pol_mult_cosgtos = accu * coeff
+
+end function NAI_pol_mult_cosgtos
+
+! ---
+
+subroutine give_cpolynomial_mult_center_one_e( A_center, B_center, alpha, beta &
+                                             , power_A, power_B, C_center, n_pt_in, d, n_pt_out)
+
+  BEGIN_DOC
+  ! Returns the explicit polynomial in terms of the "t" variable of the following
+  !
+  ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in) :: n_pt_in, power_A(3), power_B(3)
+  double precision, intent(in) :: A_center(3), B_center(3), C_center(3)
+  complex*16,       intent(in) :: alpha, beta
+  integer,         intent(out) :: n_pt_out
+  complex*16,      intent(out) :: d(0:n_pt_in)
+
+  integer                      :: a_x, b_x, a_y, b_y, a_z, b_z
+  integer                      :: n_pt1, n_pt2, n_pt3, dim, i, n_pt_tmp
+  complex*16                   :: p, P_center(3), rho, p_inv, p_inv_2
+  complex*16                   :: R1x(0:2), B01(0:2), R1xp(0:2),R2x(0:2)
+  complex*16                   :: d1(0:n_pt_in), d2(0:n_pt_in), d3(0:n_pt_in)
+
+  ASSERT (n_pt_in > 1)
+
+  p       = alpha + beta
+  p_inv   = (1.d0, 0.d0) / p
+  p_inv_2 = 0.5d0 * p_inv
+
+  do i = 1, 3
+    P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+  enddo
+
+  do i = 0, n_pt_in
+    d(i)  = (0.d0, 0.d0)
+    d1(i) = (0.d0, 0.d0)
+    d2(i) = (0.d0, 0.d0)
+    d3(i) = (0.d0, 0.d0)
+  enddo
+
+  ! ---
+
+  n_pt1 = n_pt_in
+
+  R1x(0)  =  (P_center(1) - A_center(1))
+  R1x(1)  =  (0.d0, 0.d0)
+  R1x(2)  = -(P_center(1) - C_center(1))
+
+  R1xp(0) =  (P_center(1) - B_center(1))
+  R1xp(1) =  (0.d0, 0.d0)
+  R1xp(2) = -(P_center(1) - C_center(1))
+
+  R2x(0)  =  p_inv_2
+  R2x(1)  = (0.d0, 0.d0)
+  R2x(2)  = -p_inv_2
+
+  a_x = power_A(1)
+  b_x = power_B(1)
+  call I_x1_pol_mult_one_e_cosgtos(a_x, b_x, R1x, R1xp, R2x, d1, n_pt1, n_pt_in)
+
+  if(n_pt1 < 0) then
+    n_pt_out = -1
+    do i = 0, n_pt_in
+      d(i) = (0.d0, 0.d0)
+    enddo
+    return
+  endif
+
+  ! ---
+
+  n_pt2 = n_pt_in
+
+  R1x(0)  =  (P_center(2) - A_center(2))
+  R1x(1)  =  (0.d0, 0.d0)
+  R1x(2)  = -(P_center(2) - C_center(2))
+
+  R1xp(0) =  (P_center(2) - B_center(2))
+  R1xp(1) =  (0.d0, 0.d0)
+  R1xp(2) = -(P_center(2) - C_center(2))
+
+  a_y = power_A(2)
+  b_y = power_B(2)
+  call I_x1_pol_mult_one_e_cosgtos(a_y, b_y, R1x, R1xp, R2x, d2, n_pt2, n_pt_in)
+
+  if(n_pt2 < 0) then
+    n_pt_out = -1
+    do i = 0, n_pt_in
+      d(i) = (0.d0, 0.d0)
+    enddo
+    return
+  endif
+
+  ! ---
+
+  n_pt3 = n_pt_in
+
+  R1x(0)  =  (P_center(3) - A_center(3))
+  R1x(1)  =  (0.d0, 0.d0)
+  R1x(2)  = -(P_center(3) - C_center(3))
+
+  R1xp(0) =  (P_center(3) - B_center(3))
+  R1xp(1) =  (0.d0, 0.d0)
+  R1xp(2) = -(P_center(3) - C_center(3))
+
+  a_z = power_A(3)
+  b_z = power_B(3)
+  call I_x1_pol_mult_one_e_cosgtos(a_z, b_z, R1x, R1xp, R2x, d3, n_pt3, n_pt_in)
+
+  if(n_pt3 < 0) then
+    n_pt_out = -1
+    do i = 0, n_pt_in
+      d(i) = (0.d0, 0.d0)
+    enddo
+    return
+  endif
+
+  ! ---
+
+  n_pt_tmp = 0
+  call multiply_cpoly(d1, n_pt1, d2, n_pt2, d, n_pt_tmp)
+  do i = 0, n_pt_tmp
+    d1(i) = (0.d0, 0.d0)
+  enddo
+
+  n_pt_out = 0
+  call multiply_cpoly(d, n_pt_tmp, d3, n_pt3, d1, n_pt_out)
+  do i = 0, n_pt_out
+    d(i) = d1(i)
+  enddo
+
+end subroutine give_cpolynomial_mult_center_one_e
+
+! ---
+
+recursive subroutine I_x1_pol_mult_one_e_cosgtos(a, c, R1x, R1xp, R2x, d, nd, n_pt_in)
+
+  BEGIN_DOC
+  !  Recursive routine involved in the electron-nucleus potential
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: a, c, n_pt_in
+  complex*16, intent(in)    :: R1x(0:2), R1xp(0:2), R2x(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:n_pt_in)
+
+  integer                   :: nx, ix, dim, iy, ny
+  complex*16                :: X(0:max_dim)
+  complex*16                :: Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
+
+  dim = n_pt_in
+
+  if( (a==0) .and. (c==0)) then
+
+    nd   = 0
+    d(0) = (1.d0, 0.d0)
+    return
+
+  elseif( (c < 0) .or. (nd < 0) ) then
+
+    nd = -1
+    return
+
+  elseif((a == 0) .and. (c .ne. 0)) then
+
+    call I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, d, nd, n_pt_in)
+
+  elseif(a == 1) then
+
+    nx = nd
+    do ix = 0, n_pt_in
+      X(ix) = (0.d0, 0.d0)
+      Y(ix) = (0.d0, 0.d0)
+    enddo
+
+    call I_x2_pol_mult_one_e_cosgtos(c-1, R1x, R1xp, R2x, X, nx, n_pt_in)
+
+    do ix = 0, nx
+      X(ix) *= dble(c)
+    enddo
+
+    call multiply_cpoly(X, nx, R2x, 2, d, nd)
+
+    ny = 0
+    call I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, Y, ny, n_pt_in)
+    call multiply_cpoly(Y, ny, R1x, 2, d, nd)
+
+  else
+
+    nx = 0
+    do ix = 0, n_pt_in
+      X(ix) = (0.d0, 0.d0)
+      Y(ix) = (0.d0, 0.d0)
+    enddo
+
+    call I_x1_pol_mult_one_e_cosgtos(a-2, c, R1x, R1xp, R2x, X, nx, n_pt_in)
+
+    do ix = 0, nx
+      X(ix) *= dble(a-1)
+    enddo
+    call multiply_cpoly(X, nx, R2x, 2, d, nd)
+
+    nx = nd
+    do ix = 0, n_pt_in
+      X(ix) = (0.d0, 0.d0)
+    enddo
+
+    call I_x1_pol_mult_one_e_cosgtos(a-1, c-1, R1x, R1xp, R2x, X, nx, n_pt_in)
+    do ix = 0, nx
+      X(ix) *= dble(c)
+    enddo
+
+    call multiply_cpoly(X, nx, R2x, 2, d, nd)
+
+    ny = 0
+    call I_x1_pol_mult_one_e_cosgtos(a-1, c, R1x, R1xp, R2x, Y, ny, n_pt_in)
+    call multiply_cpoly(Y, ny, R1x, 2, d, nd)
+
+  endif
+
+end subroutine I_x1_pol_mult_one_e_cosgtos
+
+! ---
+
+recursive subroutine I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, d, nd, dim)
+
+  BEGIN_DOC
+  !  Recursive routine involved in the electron-nucleus potential
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: dim, c
+  complex*16, intent(in)    :: R1x(0:2), R1xp(0:2), R2x(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(out)   :: d(0:max_dim)
+
+  integer                   :: i, nx, ix, ny
+  complex*16                :: X(0:max_dim), Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
+
+  if(c == 0) then
+
+    nd = 0
+    d(0) = (1.d0, 0.d0)
+    return
+
+  elseif((nd < 0) .or. (c < 0)) then
+
+    nd = -1
+    return
+
+  else
+
+    nx = 0
+    do ix = 0, dim
+      X(ix) = (0.d0, 0.d0)
+      Y(ix) = (0.d0, 0.d0)
+    enddo
+
+    call I_x1_pol_mult_one_e_cosgtos(0, c-2, R1x, R1xp, R2x, X, nx, dim)
+
+    do ix = 0, nx
+      X(ix) *= dble(c-1)
+    enddo
+
+    call multiply_cpoly(X, nx, R2x, 2, d, nd)
+
+    ny = 0
+    do ix = 0, dim
+      Y(ix) = (0.d0, 0.d0)
+    enddo
+
+    call I_x1_pol_mult_one_e_cosgtos(0, c-1, R1x, R1xp, R2x, Y, ny, dim)
+
+    if(ny .ge. 0) then
+      call multiply_cpoly(Y, ny, R1xp, 2, d, nd)
+    endif
+
+  endif
+
+end subroutine I_x2_pol_mult_one_e_cosgtos
+
+! ---
+
+complex*16 function V_n_e_cosgtos(a_x, a_y, a_z, b_x, b_y, b_z, alpha, beta)
+
+  BEGIN_DOC
+  ! Primitve nuclear attraction between the two primitves centered on the same atom.
+  !
+  ! $p_1 = x^{a_x} y^{a_y} z^{a_z} \exp(-\alpha r^2)$
+  !
+  ! $p_2 = x^{b_x} y^{b_y} z^{b_z} \exp(-\beta  r^2)$
+  END_DOC
+
+  implicit none
+
+  integer,    intent(in) :: a_x, a_y, a_z, b_x, b_y, b_z
+  complex*16, intent(in) :: alpha, beta
+
+  double precision       :: V_phi, V_theta
+  complex*16             :: V_r_cosgtos
+
+  if( (iand(a_x + b_x, 1) == 1) .or. &
+      (iand(a_y + b_y, 1) == 1) .or. &
+      (iand(a_z + b_z, 1) == 1) ) then
+
+    V_n_e_cosgtos = (0.d0, 0.d0)
+
+  else
+
+    V_n_e_cosgtos = V_r_cosgtos(a_x + b_x + a_y + b_y + a_z + b_z + 1, alpha + beta) &
+                  * V_phi(a_x + b_x, a_y + b_y)                                      &
+                  * V_theta(a_z + b_z, a_x + b_x + a_y + b_y + 1)
+  endif
+
+end function V_n_e_cosgtos
+
+! ---
+
+complex*16 function V_r_cosgtos(n, alpha)
+
+  BEGIN_DOC
+  ! Computes the radial part of the nuclear attraction integral:
+  !
+  ! $\int_{0}^{\infty} r^n  \exp(-\alpha  r^2)  dr$
+  !
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer   , intent(in) :: n
+  complex*16, intent(in) :: alpha
+
+  double precision       :: fact
+
+  if(iand(n, 1) .eq. 1) then
+    V_r_cosgtos = 0.5d0 * fact(shiftr(n, 1)) / (alpha**(shiftr(n, 1) + 1))
+  else
+    V_r_cosgtos = sqpi * fact(n) / fact(shiftr(n, 1)) * (0.5d0/zsqrt(alpha))**(n+1)
+  endif
+
+end function V_r_cosgtos
+
+! ---
+
diff --git a/src/cosgtos_ao_int/one_e_kin_integrals.irp.f b/src/cosgtos_ao_int/one_e_kin_integrals.irp.f
new file mode 100644
index 00000000..710b04d4
--- /dev/null
+++ b/src/cosgtos_ao_int/one_e_kin_integrals.irp.f
@@ -0,0 +1,223 @@
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_x, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_y, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_z, (ao_num, ao_num) ]
+
+  implicit none
+  integer          :: i, j, n, l, dim1, power_A(3), power_B(3)
+  double precision :: c, deriv_tmp
+  complex*16       :: alpha, beta, A_center(3), B_center(3)
+  complex*16       :: overlap_x, overlap_y, overlap_z, overlap
+  complex*16       :: overlap_x0_1, overlap_y0_1, overlap_z0_1
+  complex*16       :: overlap_x0_2, overlap_y0_2, overlap_z0_2 
+  complex*16       :: overlap_m2_1, overlap_p2_1
+  complex*16       :: overlap_m2_2, overlap_p2_2
+  complex*16       :: deriv_tmp_1, deriv_tmp_2
+
+
+  dim1 = 100
+
+  ! -- Dummy call to provide everything
+
+  A_center(:) = (0.0d0, 0.d0)
+  B_center(:) = (1.0d0, 0.d0)
+  alpha       = (1.0d0, 0.d0)
+  beta        = (0.1d0, 0.d0)
+  power_A     = 1
+  power_B     = 0
+  call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B       &
+                            , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap, dim1 )
+
+  ! ---
+
+ !$OMP PARALLEL DO SCHEDULE(GUIDED)                                                                  &
+ !$OMP DEFAULT(NONE)                                                                                 &
+ !$OMP PRIVATE( A_center, B_center, power_A, power_B, alpha, beta, i, j, l, n, c                     &
+ !$OMP        , deriv_tmp, deriv_tmp_1, deriv_tmp_2                                                  &
+ !$OMP        , overlap_x, overlap_y, overlap_z, overlap                                             &
+ !$OMP        , overlap_m2_1, overlap_p2_1, overlap_m2_2, overlap_p2_2                               &
+ !$OMP        , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap_x0_2, overlap_y0_2, overlap_z0_2 ) &
+ !$OMP SHARED( nucl_coord, ao_power, ao_prim_num, ao_num, ao_nucl, dim1                              &
+ !$OMP       , ao_coef_norm_ord_transp_cosgtos, ao_expo_ord_transp_cosgtos                           & 
+ !$OMP       , ao_deriv2_cosgtos_x, ao_deriv2_cosgtos_y, ao_deriv2_cosgtos_z ) 
+
+  do j = 1, ao_num
+    A_center(1) = nucl_coord(ao_nucl(j),1) * (1.d0, 0.d0)
+    A_center(2) = nucl_coord(ao_nucl(j),2) * (1.d0, 0.d0)
+    A_center(3) = nucl_coord(ao_nucl(j),3) * (1.d0, 0.d0)
+    power_A(1)  = ao_power(j,1)
+    power_A(2)  = ao_power(j,2)
+    power_A(3)  = ao_power(j,3)
+
+    do i = 1, ao_num
+      B_center(1) = nucl_coord(ao_nucl(i),1) * (1.d0, 0.d0) 
+      B_center(2) = nucl_coord(ao_nucl(i),2) * (1.d0, 0.d0)
+      B_center(3) = nucl_coord(ao_nucl(i),3) * (1.d0, 0.d0)
+      power_B(1)  = ao_power(i,1)
+      power_B(2)  = ao_power(i,2)
+      power_B(3)  = ao_power(i,3)
+
+      ao_deriv2_cosgtos_x(i,j) = 0.d0
+      ao_deriv2_cosgtos_y(i,j) = 0.d0
+      ao_deriv2_cosgtos_z(i,j) = 0.d0
+
+      do n = 1, ao_prim_num(j)
+        alpha = ao_expo_ord_transp_cosgtos(n,j)
+
+        do l = 1, ao_prim_num(i)
+          c    = ao_coef_norm_ord_transp_cosgtos(n,j) * ao_coef_norm_ord_transp_cosgtos(l,i)
+          beta = ao_expo_ord_transp_cosgtos(l,i)
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B       &
+                                    , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap, dim1 )
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                    , overlap_x0_2, overlap_y0_2, overlap_z0_2, overlap, dim1  )
+
+          ! ---
+
+          power_A(1) = power_A(1) - 2
+          if(power_A(1) > -1) then
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                      , overlap_m2_1, overlap_y, overlap_z, overlap, dim1 )
+
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                      , overlap_m2_2, overlap_y, overlap_z, overlap, dim1        )
+          else
+            overlap_m2_1 = (0.d0, 0.d0)
+            overlap_m2_2 = (0.d0, 0.d0)
+          endif
+
+          power_A(1) = power_A(1) + 4
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                    , overlap_p2_1, overlap_y, overlap_z, overlap, dim1 )
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                    , overlap_p2_2, overlap_y, overlap_z, overlap, dim1        )
+
+          power_A(1) = power_A(1) - 2
+
+          deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(1) + 1.d0) * overlap_x0_1 &
+                        + power_A(1) * (power_A(1) - 1.d0) * overlap_m2_1           &
+                        + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_y0_1 * overlap_z0_1
+
+          deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(1) + 1.d0) * overlap_x0_2 &
+                        + power_A(1) * (power_A(1) - 1.d0) * overlap_m2_2           &
+                        + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_y0_2 * overlap_z0_2
+
+          deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2)
+
+          ao_deriv2_cosgtos_x(i,j) += c * deriv_tmp
+
+          ! ---
+
+          power_A(2) = power_A(2) - 2
+          if(power_A(2) > -1) then
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                      , overlap_x, overlap_m2_1, overlap_y, overlap, dim1 )
+
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                      , overlap_x, overlap_m2_2, overlap_y, overlap, dim1        )
+          else
+            overlap_m2_1 = (0.d0, 0.d0)
+            overlap_m2_2 = (0.d0, 0.d0)
+          endif
+
+          power_A(2) = power_A(2) + 4
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                    , overlap_x, overlap_p2_1, overlap_y, overlap, dim1 )
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                    , overlap_x, overlap_p2_2, overlap_y, overlap, dim1        )
+
+          power_A(2) = power_A(2) - 2
+
+          deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(2) + 1.d0) * overlap_y0_1 &
+                        + power_A(2) * (power_A(2) - 1.d0) * overlap_m2_1           &
+                        + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_x0_1 * overlap_z0_1
+
+          deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(2) + 1.d0) * overlap_y0_2 &
+                        + power_A(2) * (power_A(2) - 1.d0) * overlap_m2_2           &
+                        + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_x0_2 * overlap_z0_2
+
+          deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2)
+
+          ao_deriv2_cosgtos_y(i,j) += c * deriv_tmp
+
+          ! ---
+
+          power_A(3) = power_A(3) - 2
+          if(power_A(3) > -1) then
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                      , overlap_x, overlap_y, overlap_m2_1, overlap, dim1 )
+
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                      , overlap_x, overlap_y, overlap_m2_2, overlap, dim1        )
+          else
+            overlap_m2_1 = (0.d0, 0.d0)
+            overlap_m2_2 = (0.d0, 0.d0)
+          endif
+
+          power_A(3) = power_A(3) + 4
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                    , overlap_x, overlap_y, overlap_p2_1, overlap, dim1 )
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                    , overlap_x, overlap_y, overlap_p2_2, overlap, dim1        )
+
+          power_A(3) = power_A(3) - 2
+          
+          deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(3) + 1.d0) * overlap_z0_1 &
+                        + power_A(3) * (power_A(3) - 1.d0) * overlap_m2_1           &
+                        + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_x0_1 * overlap_y0_1
+
+          deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(3) + 1.d0) * overlap_z0_2 &
+                        + power_A(3) * (power_A(3) - 1.d0) * overlap_m2_2           &
+                        + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_x0_2 * overlap_y0_2
+
+          deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2)
+
+          ao_deriv2_cosgtos_z(i,j) += c * deriv_tmp
+
+          ! ---
+
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_kinetic_integrals_cosgtos, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  ! 
+  ! Kinetic energy integrals in the cosgtos |AO| basis.
+  !
+  ! $\langle \chi_i |\hat{T}| \chi_j \rangle$
+  !
+  END_DOC
+
+  implicit none
+  integer :: i, j
+
+ !$OMP PARALLEL DO DEFAULT(NONE) &
+ !$OMP  PRIVATE(i, j)            &
+ !$OMP  SHARED(ao_num, ao_kinetic_integrals_cosgtos, ao_deriv2_cosgtos_x, ao_deriv2_cosgtos_y, ao_deriv2_cosgtos_z)
+  do j = 1, ao_num
+    do i = 1, ao_num
+      ao_kinetic_integrals_cosgtos(i,j) = -0.5d0 * ( ao_deriv2_cosgtos_x(i,j) &
+                                                   + ao_deriv2_cosgtos_y(i,j) &
+                                                   + ao_deriv2_cosgtos_z(i,j) )
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+! ---
diff --git a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f b/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f
new file mode 100644
index 00000000..527a98d5
--- /dev/null
+++ b/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f
@@ -0,0 +1,1584 @@
+
+! ---
+
+double precision function ao_two_e_integral_cosgtos(i, j, k, l)
+
+  BEGIN_DOC
+  !  integral of the AO basis <ik|jl> or (ij|kl)
+  !     i(r1) j(r1) 1/r12 k(r2) l(r2)
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer, intent(in) :: i, j, k, l
+
+  integer             :: p, q, r, s
+  integer             :: num_i, num_j, num_k, num_l, dim1, I_power(3), J_power(3), K_power(3), L_power(3)
+  integer             :: iorder_p1(3), iorder_p2(3), iorder_p3(3), iorder_p4(3), iorder_q1(3), iorder_q2(3)
+  double precision    :: coef1, coef2, coef3, coef4
+  complex*16          :: I_center(3), J_center(3), K_center(3), L_center(3)
+  complex*16          :: expo1, expo2, expo3, expo4
+  complex*16          :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  complex*16          :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv
+  complex*16          :: P3_new(0:max_dim,3), P3_center(3), fact_p3, pp3, p3_inv
+  complex*16          :: P4_new(0:max_dim,3), P4_center(3), fact_p4, pp4, p4_inv
+  complex*16          :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  complex*16          :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv
+  complex*16          :: integral1, integral2, integral3, integral4
+  complex*16          :: integral5, integral6, integral7, integral8
+  complex*16          :: integral_tot
+
+  double precision    :: ao_two_e_integral_cosgtos_schwartz_accel
+  complex*16          :: ERI_cosgtos
+  complex*16          :: general_primitive_integral_cosgtos
+
+  if(ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024) then
+
+    !print *, ' with shwartz acc '
+    ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l)
+
+  else
+    !print *, ' without shwartz acc '
+
+    dim1 = n_pt_max_integrals
+
+    num_i = ao_nucl(i)
+    num_j = ao_nucl(j)
+    num_k = ao_nucl(k)
+    num_l = ao_nucl(l)
+
+    ao_two_e_integral_cosgtos = 0.d0
+
+    if(num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k) then
+      !print *, ' not the same center'
+
+      do p = 1, 3
+        I_power(p)  = ao_power(i,p)
+        J_power(p)  = ao_power(j,p)
+        K_power(p)  = ao_power(k,p)
+        L_power(p)  = ao_power(l,p)
+        I_center(p) = nucl_coord(num_i,p) * (1.d0, 0.d0) 
+        J_center(p) = nucl_coord(num_j,p) * (1.d0, 0.d0)
+        K_center(p) = nucl_coord(num_k,p) * (1.d0, 0.d0)
+        L_center(p) = nucl_coord(num_l,p) * (1.d0, 0.d0)
+      enddo
+
+      do p = 1, ao_prim_num(i)
+        coef1 = ao_coef_norm_ord_transp_cosgtos(p,i)
+        expo1 = ao_expo_ord_transp_cosgtos(p,i) 
+
+        do q = 1, ao_prim_num(j)
+          coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j)
+          expo2 = ao_expo_ord_transp_cosgtos(q,j) 
+
+          call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1               &
+                                                , expo1, expo2, I_power, J_power, I_center, J_center, dim1 )
+          p1_inv = (1.d0,0.d0) / pp1
+
+          call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2                      &
+                                                , conjg(expo1), expo2, I_power, J_power, I_center, J_center, dim1 )
+          p2_inv = (1.d0,0.d0) / pp2
+
+          call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3                      &
+                                                , expo1, conjg(expo2), I_power, J_power, I_center, J_center, dim1 )
+          p3_inv = (1.d0,0.d0) / pp3
+
+          call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4                             &
+                                                , conjg(expo1), conjg(expo2), I_power, J_power, I_center, J_center, dim1 )
+          p4_inv = (1.d0,0.d0) / pp4
+
+          !integer :: ii
+          !do ii = 1, 3
+          !  print *, 'fact_p1', fact_p1
+          !  print *, 'fact_p2', fact_p2
+          !  print *, 'fact_p3', fact_p3
+          !  print *, 'fact_p4', fact_p4
+          !  !print *, pp1, p1_inv
+          !  !print *, pp2, p2_inv
+          !  !print *, pp3, p3_inv
+          !  !print *, pp4, p4_inv
+          !enddo 
+          !  if( abs(aimag(P1_center(ii))) .gt. 0.d0 ) then
+          !    print *, ' P_1 is complex !!'
+          !    print *, P1_center
+          !    print *, expo1, expo2
+          !    print *, conjg(expo1), conjg(expo2)
+          !    stop
+          !  endif
+          !  if( abs(aimag(P2_center(ii))) .gt. 0.d0 ) then
+          !    print *, ' P_2 is complex !!'
+          !    print *, P2_center
+          !    print *, ' old expos:'
+          !    print *, expo1, expo2
+          !    print *, conjg(expo1), conjg(expo2)
+          !    print *, ' new expo:'
+          !    print *, pp2, p2_inv
+          !    print *, ' factor:'
+          !    print *, fact_p2
+          !    print *, ' old centers:'
+          !    print *, I_center, J_center
+          !    print *, ' powers:'
+          !    print *, I_power, J_power
+          !    stop
+          !  endif
+          !  if( abs(aimag(P3_center(ii))) .gt. 0.d0 ) then
+          !    print *, ' P_3 is complex !!'
+          !    print *, P3_center
+          !    print *, expo1, expo2
+          !    print *, conjg(expo1), conjg(expo2)
+          !    stop
+          !  endif
+          !  if( abs(aimag(P4_center(ii))) .gt. 0.d0 ) then
+          !    print *, ' P_4 is complex !!'
+          !    print *, P4_center
+          !    print *, expo1, expo2
+          !    print *, conjg(expo1), conjg(expo2)
+          !    stop
+          !  endif
+          !enddo
+
+          do r = 1, ao_prim_num(k)
+            coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k)
+            expo3 = ao_expo_ord_transp_cosgtos(r,k) 
+
+            do s = 1, ao_prim_num(l)
+              coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l)
+              expo4 = ao_expo_ord_transp_cosgtos(s,l) 
+
+              call give_explicit_cpoly_and_cgaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q1               &
+                                                    , expo3, expo4, K_power, L_power, K_center, L_center, dim1 )
+              q1_inv = (1.d0,0.d0) / qq1
+
+              call give_explicit_cpoly_and_cgaussian( Q2_new, Q2_center, qq2, fact_q2, iorder_q2               &
+                                                    , conjg(expo3), expo4, K_power, L_power, K_center, L_center, dim1 )
+              q2_inv = (1.d0,0.d0) / qq2
+
+              !do ii = 1, 3
+              !  !print *, qq1, q1_inv
+              !  !print *, qq2, q2_inv
+              !  print *, 'fact_q1', fact_q1
+              !  print *, 'fact_q2', fact_q2
+              !enddo 
+              !  if( abs(aimag(Q1_center(ii))) .gt. 0.d0 ) then
+              !    print *, ' Q_1 is complex !!'
+              !    print *, Q1_center
+              !    print *, expo3, expo4
+              !    print *, conjg(expo3), conjg(expo4)
+              !    stop
+              !  endif
+              !  if( abs(aimag(Q2_center(ii))) .gt. 0.d0 ) then
+              !    print *, ' Q_2 is complex !!'
+              !    print *, Q2_center
+              !    print *, expo3, expo4
+              !    print *, conjg(expo3), conjg(expo4)
+              !    stop
+              !  endif
+              !enddo
+
+
+              integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                                  , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+              integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                                  , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+              integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                                  , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+              integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                                  , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+              integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                                  , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+              integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                                  , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+              integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                                  , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+              integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                                  , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+              integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+              !integral_tot = integral1
+              !print*, integral_tot
+
+              ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos + coef4 * 2.d0 * real(integral_tot)
+            enddo ! s
+          enddo  ! r
+        enddo   ! q
+      enddo    ! p
+
+    else
+      !print *, ' the same center'
+
+      do p = 1, 3
+        I_power(p) = ao_power(i,p)
+        J_power(p) = ao_power(j,p)
+        K_power(p) = ao_power(k,p)
+        L_power(p) = ao_power(l,p)
+      enddo
+
+      do p = 1, ao_prim_num(i)
+        coef1 = ao_coef_norm_ord_transp_cosgtos(p,i)
+        expo1 = ao_expo_ord_transp_cosgtos(p,i) 
+
+        do q = 1, ao_prim_num(j)
+          coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j)
+          expo2 = ao_expo_ord_transp_cosgtos(q,j) 
+
+          do r = 1, ao_prim_num(k)
+            coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k)
+            expo3 = ao_expo_ord_transp_cosgtos(r,k) 
+
+            do s = 1, ao_prim_num(l)
+              coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l)
+              expo4 = ao_expo_ord_transp_cosgtos(s,l) 
+
+              integral1 = ERI_cosgtos( expo1, expo2, expo3, expo4                     &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral2 = ERI_cosgtos( expo1, expo2, conjg(expo3), expo4              &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral3 = ERI_cosgtos( conjg(expo1), expo2, expo3, expo4              &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo3), expo4       &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral5 = ERI_cosgtos( expo1, conjg(expo2), expo3, expo4              &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo3), expo4       &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo3, expo4       &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo3), expo4 &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1)  &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2)  &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3)  )
+
+              integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+
+              ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos + coef4 * 2.d0 * real(integral_tot)
+            enddo ! s
+          enddo  ! r
+        enddo   ! q
+      enddo    ! p
+
+    endif
+  endif
+
+end function ao_two_e_integral_cosgtos
+
+! ---
+
+double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l)
+
+  BEGIN_DOC
+  !  integral of the AO basis <ik|jl> or (ij|kl)
+  !     i(r1) j(r1) 1/r12 k(r2) l(r2)
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer, intent(in)           :: i, j, k, l
+
+  integer                       :: p, q, r, s
+  integer                       :: num_i, num_j, num_k, num_l, dim1, I_power(3), J_power(3), K_power(3), L_power(3)
+  integer                       :: iorder_p1(3), iorder_p2(3), iorder_p3(3), iorder_p4(3), iorder_q1(3), iorder_q2(3)
+  double precision              :: coef1, coef2, coef3, coef4
+  complex*16                    :: I_center(3), J_center(3), K_center(3), L_center(3)
+  complex*16                    :: expo1, expo2, expo3, expo4
+  complex*16                    :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  complex*16                    :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv
+  complex*16                    :: P3_new(0:max_dim,3), P3_center(3), fact_p3, pp3, p3_inv
+  complex*16                    :: P4_new(0:max_dim,3), P4_center(3), fact_p4, pp4, p4_inv
+  complex*16                    :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  complex*16                    :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv
+  complex*16                    :: integral1, integral2, integral3, integral4
+  complex*16                    :: integral5, integral6, integral7, integral8
+  complex*16                    :: integral_tot
+
+  double precision, allocatable :: schwartz_kl(:,:)
+  double precision              :: thr
+  double precision              :: schwartz_ij
+
+  complex*16                    :: ERI_cosgtos
+  complex*16                    :: general_primitive_integral_cosgtos
+
+  ao_two_e_integral_cosgtos_schwartz_accel = 0.d0
+
+  dim1 = n_pt_max_integrals
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+
+
+  thr = ao_integrals_threshold*ao_integrals_threshold
+
+  allocate( schwartz_kl(0:ao_prim_num(l),0:ao_prim_num(k)) )
+
+  if(num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k) then
+
+    do p = 1, 3
+      I_power(p)  = ao_power(i,p)
+      J_power(p)  = ao_power(j,p)
+      K_power(p)  = ao_power(k,p)
+      L_power(p)  = ao_power(l,p)
+      I_center(p) = nucl_coord(num_i,p) * (1.d0, 0.d0)
+      J_center(p) = nucl_coord(num_j,p) * (1.d0, 0.d0)
+      K_center(p) = nucl_coord(num_k,p) * (1.d0, 0.d0)
+      L_center(p) = nucl_coord(num_l,p) * (1.d0, 0.d0)
+    enddo
+
+    schwartz_kl(0,0) = 0.d0
+    do r = 1, ao_prim_num(k)
+      coef1 = ao_coef_norm_ord_transp_cosgtos(r,k) * ao_coef_norm_ord_transp_cosgtos(r,k)
+      expo1 = ao_expo_ord_transp_cosgtos(r,k) 
+
+      schwartz_kl(0,r) = 0.d0
+      do s = 1, ao_prim_num(l)
+        coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(s,l) * ao_coef_norm_ord_transp_cosgtos(s,l)
+        expo2 = ao_expo_ord_transp_cosgtos(s,l) 
+
+        call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1               &
+                                              , expo1, expo2, K_power, L_power, K_center, L_center, dim1 )
+        p1_inv = (1.d0,0.d0) / pp1
+
+        call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2                      &
+                                              , conjg(expo1), expo2, K_power, L_power, K_center, L_center, dim1 )
+        p2_inv = (1.d0,0.d0) / pp2
+
+        call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3                      &
+                                              , expo1, conjg(expo2), K_power, L_power, K_center, L_center, dim1 )
+        p3_inv = (1.d0,0.d0) / pp3
+
+        call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4                             &
+                                              , conjg(expo1), conjg(expo2), K_power, L_power, K_center, L_center, dim1 )
+        p4_inv = (1.d0,0.d0) / pp4
+
+        integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+
+        schwartz_kl(s,r) = coef2 * 2.d0 * real(integral_tot)
+
+        schwartz_kl(0,r) = max(schwartz_kl(0,r), schwartz_kl(s,r))
+      enddo
+
+      schwartz_kl(0,0) = max(schwartz_kl(0,r), schwartz_kl(0,0))
+    enddo
+
+
+    do p = 1, ao_prim_num(i)
+      coef1 = ao_coef_norm_ord_transp_cosgtos(p,i)
+      expo1 = ao_expo_ord_transp_cosgtos(p,i) 
+
+      do q = 1, ao_prim_num(j)
+        coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j)
+        expo2 = ao_expo_ord_transp_cosgtos(q,j) 
+
+        call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1               &
+                                              , expo1, expo2, I_power, J_power, I_center, J_center, dim1 )
+        p1_inv = (1.d0,0.d0) / pp1
+
+        call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2                      &
+                                              , conjg(expo1), expo2, I_power, J_power, I_center, J_center, dim1 )
+        p2_inv = (1.d0,0.d0) / pp2
+
+        call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3                      &
+                                              , expo1, conjg(expo2), I_power, J_power, I_center, J_center, dim1 )
+        p3_inv = (1.d0,0.d0) / pp3
+
+        call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4                             &
+                                              , conjg(expo1), conjg(expo2), I_power, J_power, I_center, J_center, dim1 )
+        p4_inv = (1.d0,0.d0) / pp4
+
+        integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+        schwartz_ij = coef2 * coef2 * 2.d0 * real(integral_tot)
+
+        if(schwartz_kl(0,0)*schwartz_ij < thr) cycle
+
+        do r = 1, ao_prim_num(k)
+          if(schwartz_kl(0,r)*schwartz_ij < thr) cycle
+
+          coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k)
+          expo3 = ao_expo_ord_transp_cosgtos(r,k)
+
+          do s = 1, ao_prim_num(l)
+            if(schwartz_kl(s,r)*schwartz_ij < thr) cycle
+
+            coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l)
+            expo4 = ao_expo_ord_transp_cosgtos(s,l)
+
+            call give_explicit_cpoly_and_cgaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q1               &
+                                                  , expo3, expo4, K_power, L_power, K_center, L_center, dim1 )
+            q1_inv = (1.d0,0.d0) / qq1
+
+            call give_explicit_cpoly_and_cgaussian( Q2_new, Q2_center, qq2, fact_q2, iorder_q2               &
+                                                  , conjg(expo3), expo4, K_power, L_power, K_center, L_center, dim1 )
+            q2_inv = (1.d0,0.d0) / qq2
+
+            integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                                , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+            integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                                , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+            integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                                , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+            integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                                , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+
+            integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                                , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+            integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                                , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+            integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                                , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+            integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                                , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+            integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+            ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel &
+                                                     + coef4 * 2.d0 * real(integral_tot)
+          enddo ! s
+        enddo  ! r
+      enddo   ! q
+    enddo    ! p
+
+  else
+
+    do p = 1, 3
+      I_power(p) = ao_power(i,p)
+      J_power(p) = ao_power(j,p)
+      K_power(p) = ao_power(k,p)
+      L_power(p) = ao_power(l,p)
+    enddo
+
+    schwartz_kl(0,0) = 0.d0
+    do r = 1, ao_prim_num(k)
+      coef1 = ao_coef_norm_ord_transp_cosgtos(r,k) * ao_coef_norm_ord_transp_cosgtos(r,k)
+      expo1 = ao_expo_ord_transp_cosgtos(r,k)
+
+      schwartz_kl(0,r) = 0.d0
+      do s = 1, ao_prim_num(l)
+        coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(s,l) * ao_coef_norm_ord_transp_cosgtos(s,l)
+        expo2 = ao_expo_ord_transp_cosgtos(s,l)
+
+        integral1 = ERI_cosgtos( expo1, expo2, expo1, expo2                     &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+        integral2 = ERI_cosgtos( expo1, expo2, conjg(expo1), expo2              &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+
+        integral3 = ERI_cosgtos( conjg(expo1), expo2, expo1, expo2              &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+        integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo1), expo2       &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+
+        integral5 = ERI_cosgtos( expo1, conjg(expo2), expo1, expo2              &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+        integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo1), expo2       &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+
+        integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo1, expo2       &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+        integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo1), expo2 &
+                               , K_power(1), L_power(1), K_power(1), L_power(1)  &
+                               , K_power(2), L_power(2), K_power(2), L_power(2)  &
+                               , K_power(3), L_power(3), K_power(3), L_power(3)  )
+
+        integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+
+        schwartz_kl(s,r) = coef2 * 2.d0 * real(integral_tot)
+
+        schwartz_kl(0,r) = max(schwartz_kl(0,r), schwartz_kl(s,r))
+      enddo
+      schwartz_kl(0,0) = max(schwartz_kl(0,r), schwartz_kl(0,0))
+    enddo
+
+    do p = 1, ao_prim_num(i)
+      coef1 = ao_coef_norm_ord_transp_cosgtos(p,i)
+      expo1 = ao_expo_ord_transp_cosgtos(p,i)
+
+      do q = 1, ao_prim_num(j)
+        coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j)
+        expo2 = ao_expo_ord_transp_cosgtos(q,j)
+
+        integral1 = ERI_cosgtos( expo1, expo2, expo1, expo2                     &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral2 = ERI_cosgtos( expo1, expo2, conjg(expo1), expo2              &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral3 = ERI_cosgtos( conjg(expo1), expo2, expo1, expo2              &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo1), expo2       &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral5 = ERI_cosgtos( expo1, conjg(expo2), expo1, expo2              &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo1), expo2       &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo1, expo2       &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo1), expo2 &
+                               , I_power(1), J_power(1), I_power(1), J_power(1)  &
+                               , I_power(2), J_power(2), I_power(2), J_power(2)  &
+                               , I_power(3), J_power(3), I_power(3), J_power(3)  )
+
+        integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+        schwartz_ij = coef2 * coef2 * 2.d0 * real(integral_tot)
+
+        if(schwartz_kl(0,0)*schwartz_ij < thr) cycle
+        do r = 1, ao_prim_num(k)
+          if(schwartz_kl(0,r)*schwartz_ij < thr) cycle
+
+          coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k)
+          expo3 = ao_expo_ord_transp_cosgtos(r,k)
+
+          do s = 1, ao_prim_num(l)
+            if(schwartz_kl(s,r)*schwartz_ij < thr) cycle
+
+            coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l)
+            expo4 = ao_expo_ord_transp_cosgtos(s,l)
+
+            integral1 = ERI_cosgtos( expo1, expo2, expo3, expo4                     &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral2 = ERI_cosgtos( expo1, expo2, conjg(expo3), expo4              &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral3 = ERI_cosgtos( conjg(expo1), expo2, expo3, expo4              &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo3), expo4       &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral5 = ERI_cosgtos( expo1, conjg(expo2), expo3, expo4              &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo3), expo4       &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo3, expo4       &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo3), expo4 &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1)  &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2)  &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3)  )
+
+            integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+            ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel &
+                                      + coef4 * 2.d0 * real(integral_tot)
+          enddo ! s
+        enddo  ! r
+      enddo   ! q
+    enddo    ! p
+
+  endif
+
+  deallocate(schwartz_kl)
+
+end function ao_two_e_integral_cosgtos_schwartz_accel
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,ao_num)  ]
+
+  BEGIN_DOC
+  !  Needed to compute Schwartz inequalities
+  END_DOC
+
+  implicit none
+  integer          :: i, k
+  double precision :: ao_two_e_integral_cosgtos
+
+  ao_two_e_integral_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1)
+
+ !$OMP PARALLEL DO PRIVATE(i,k)                                       &
+ !$OMP             DEFAULT(NONE)                                      &
+ !$OMP             SHARED(ao_num, ao_two_e_integral_cosgtos_schwartz) &
+ !$OMP             SCHEDULE(dynamic)
+  do i = 1, ao_num
+    do k = 1, i
+      ao_two_e_integral_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k))
+      ao_two_e_integral_cosgtos_schwartz(k,i) = ao_two_e_integral_cosgtos_schwartz(i,k)
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+! ---
+
+complex*16 function general_primitive_integral_cosgtos( dim, P_new, P_center, fact_p, p, p_inv, iorder_p &
+                                                           , Q_new, Q_center, fact_q, q, q_inv, iorder_q )
+
+  BEGIN_DOC
+  !
+  ! Computes the integral <pq|rs> where p,q,r,s are cos-cGTOS primitives
+  !
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in) :: dim
+  integer,    intent(in) :: iorder_p(3), iorder_q(3)
+  complex*16, intent(in) :: P_new(0:max_dim,3), P_center(3), fact_p, p, p_inv
+  complex*16, intent(in) :: Q_new(0:max_dim,3), Q_center(3), fact_q, q, q_inv
+
+  integer                :: i, j, nx, ny, nz, n_Ix, n_Iy, n_Iz, iorder, n_pt_tmp, n_pt_out
+  double precision       :: tmp_mod
+  double precision       :: ppq_re, ppq_im, ppq_mod, sq_ppq_re, sq_ppq_im
+  complex*16             :: pq, pq_inv, pq_inv_2, p01_1, p01_2, p10_1, p10_2, ppq, sq_ppq
+  complex*16             :: rho, dist, const
+  complex*16             :: accu, tmp_p, tmp_q
+  complex*16             :: dx(0:max_dim), Ix_pol(0:max_dim), dy(0:max_dim), Iy_pol(0:max_dim), dz(0:max_dim), Iz_pol(0:max_dim)
+  complex*16             :: d1(0:max_dim), d_poly(0:max_dim)
+
+  complex*16             :: crint_sum
+
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx, Ix_pol, dy, Iy_pol, dz, Iz_pol
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly
+
+  general_primitive_integral_cosgtos = (0.d0, 0.d0)
+
+  pq       = (0.5d0, 0.d0) * p_inv * q_inv
+  pq_inv   = (0.5d0, 0.d0) / (p + q)
+  pq_inv_2 = pq_inv + pq_inv
+  p10_1    = q * pq               ! 1/(2p)
+  p01_1    = p * pq               ! 1/(2q)
+  p10_2    = pq_inv_2 * p10_1 * q ! 0.5d0*q/(pq + p*p)
+  p01_2    = pq_inv_2 * p01_1 * p ! 0.5d0*p/(q*q + pq)
+
+  ! get \sqrt(p + q)
+  !ppq     = p + q
+  !ppq_re  = REAL (ppq)
+  !ppq_im  = AIMAG(ppq)
+  !ppq_mod = dsqrt(ppq_re*ppq_re + ppq_im*ppq_im)
+  !sq_ppq_re = sq_op5 * dsqrt(ppq_re + ppq_mod)
+  !sq_ppq_im = 0.5d0 * ppq_im / sq_ppq_re 
+  !sq_ppq    = sq_ppq_re + (0.d0, 1.d0) * sq_ppq_im
+  sq_ppq = zsqrt(p + q)
+
+  ! ---
+
+  iorder = iorder_p(1) + iorder_q(1) + iorder_p(1) + iorder_q(1)
+
+  do i = 0, iorder
+    Ix_pol(i) = (0.d0, 0.d0)
+  enddo
+
+  n_Ix = 0
+  do i = 0, iorder_p(1)
+
+    tmp_p   = P_new(i,1)
+    tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p))
+    if(tmp_mod < thresh) cycle
+
+    do j = 0, iorder_q(1)
+
+      tmp_q   = tmp_p * Q_new(j,1)
+      tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q))
+      if(tmp_mod < thresh) cycle
+
+      !DIR$ FORCEINLINE
+      call give_cpolynom_mult_center_x(P_center(1), Q_center(1), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dx, nx)
+      !DIR$ FORCEINLINE
+      call add_cpoly_multiply(dx, nx, tmp_q, Ix_pol, n_Ix)
+    enddo
+  enddo
+  if(n_Ix == -1) then
+    return
+  endif
+
+  ! ---
+
+  iorder = iorder_p(2) + iorder_q(2) + iorder_p(2) + iorder_q(2)
+
+  do i = 0, iorder
+    Iy_pol(i) = (0.d0, 0.d0)
+  enddo
+
+  n_Iy = 0
+  do i = 0, iorder_p(2)
+
+    tmp_p   = P_new(i,2)
+    tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p))
+    if(tmp_mod < thresh) cycle
+
+    do j = 0, iorder_q(2)
+
+      tmp_q   = tmp_p * Q_new(j,2)
+      tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q))
+      if(tmp_mod < thresh) cycle
+
+      !DIR$ FORCEINLINE
+      call give_cpolynom_mult_center_x(P_center(2), Q_center(2), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dy, ny)
+      !DIR$ FORCEINLINE
+      call add_cpoly_multiply(dy, ny, tmp_q, Iy_pol, n_Iy)
+    enddo
+  enddo
+
+  if(n_Iy == -1) then
+    return
+  endif
+
+  ! ---
+
+  iorder = iorder_p(3) + iorder_q(3) + iorder_p(3) + iorder_q(3)
+
+  do i = 0, iorder
+    Iz_pol(i) = (0.d0, 0.d0)
+  enddo
+
+  n_Iz = 0
+  do i = 0, iorder_p(3)
+
+    tmp_p   = P_new(i,3)
+    tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p))
+    if(tmp_mod < thresh) cycle
+
+    do j = 0, iorder_q(3)
+
+      tmp_q   = tmp_p * Q_new(j,3)
+      tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q))
+      if(tmp_mod < thresh) cycle
+
+      !DIR$ FORCEINLINE
+      call give_cpolynom_mult_center_x(P_center(3), Q_center(3), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dz, nz)
+      !DIR$ FORCEINLINE
+      call add_cpoly_multiply(dz, nz, tmp_q, Iz_pol, n_Iz)
+    enddo
+  enddo
+
+  if(n_Iz == -1) then
+    return
+  endif
+
+  ! ---
+
+  rho = p * q * pq_inv_2
+  dist = (P_center(1) - Q_center(1)) * (P_center(1) - Q_center(1)) &
+       + (P_center(2) - Q_center(2)) * (P_center(2) - Q_center(2)) &
+       + (P_center(3) - Q_center(3)) * (P_center(3) - Q_center(3))
+  const = dist * rho
+
+  n_pt_tmp = n_Ix + n_Iy
+  do i = 0, n_pt_tmp
+    d_poly(i) = (0.d0, 0.d0)
+  enddo
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(Ix_pol, n_Ix, Iy_pol, n_Iy, d_poly, n_pt_tmp)
+  if(n_pt_tmp == -1) then
+    return
+  endif
+  n_pt_out = n_pt_tmp + n_Iz
+  do i = 0, n_pt_out
+    d1(i) = (0.d0, 0.d0)
+  enddo
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(d_poly, n_pt_tmp, Iz_pol, n_Iz, d1, n_pt_out)
+
+  accu = crint_sum(n_pt_out, const, d1)
+!  print *, n_pt_out, real(d1(0:n_pt_out))
+!  print *, real(accu)
+
+  general_primitive_integral_cosgtos = fact_p * fact_q * accu * pi_5_2 * p_inv * q_inv / sq_ppq
+
+end function general_primitive_integral_cosgtos
+
+! ---
+
+complex*16 function ERI_cosgtos(alpha, beta, delta, gama, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z)
+
+  BEGIN_DOC
+  !  ATOMIC PRIMTIVE two-electron integral between the 4 primitives ::
+  !         primitive_1 = x1**(a_x) y1**(a_y) z1**(a_z) exp(-alpha * r1**2)
+  !         primitive_2 = x1**(b_x) y1**(b_y) z1**(b_z) exp(- beta * r1**2)
+  !         primitive_3 = x2**(c_x) y2**(c_y) z2**(c_z) exp(-delta * r2**2)
+  !         primitive_4 = x2**(d_x) y2**(d_y) z2**(d_z) exp(- gama * r2**2)
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in) :: a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z
+  complex*16, intent(in) :: delta, gama, alpha, beta
+
+  integer                :: a_x_2, b_x_2, c_x_2, d_x_2, a_y_2, b_y_2, c_y_2, d_y_2, a_z_2, b_z_2, c_z_2, d_z_2
+  integer                :: i, j, k, l, n_pt
+  integer                :: nx, ny, nz
+  double precision       :: ppq_re, ppq_im, ppq_mod, sq_ppq_re, sq_ppq_im
+  complex*16             :: p, q, ppq, sq_ppq, coeff, I_f
+
+  ERI_cosgtos = (0.d0, 0.d0)
+
+  ASSERT (REAL(alpha) >= 0.d0)
+  ASSERT (REAL(beta ) >= 0.d0)
+  ASSERT (REAL(delta) >= 0.d0)
+  ASSERT (REAL(gama ) >= 0.d0)
+
+  nx = a_x + b_x + c_x + d_x
+  if(iand(nx,1) == 1) then
+    ERI_cosgtos = (0.d0, 0.d0)
+    return
+  endif
+
+  ny = a_y + b_y + c_y + d_y
+  if(iand(ny,1) == 1) then
+    ERI_cosgtos = (0.d0, 0.d0)
+    return
+  endif
+
+  nz = a_z + b_z + c_z + d_z
+  if(iand(nz,1) == 1) then
+    ERI_cosgtos = (0.d0, 0.d0)
+    return
+  endif
+
+  n_pt = shiftl(nx+ny+nz, 1)
+
+  p = alpha + beta
+  q = delta + gama
+
+  ! get \sqrt(p + q)
+  !ppq     = p + q
+  !ppq_re  = REAL (ppq)
+  !ppq_im  = AIMAG(ppq)
+  !ppq_mod = dsqrt(ppq_re*ppq_re + ppq_im*ppq_im)
+  !sq_ppq_re = sq_op5 * dsqrt(ppq_re + ppq_mod)
+  !sq_ppq_im = 0.5d0 * ppq_im / sq_ppq_re 
+  !sq_ppq    = sq_ppq_re + (0.d0, 1.d0) * sq_ppq_im
+  sq_ppq = zsqrt(p + q)
+
+  coeff = pi_5_2 / (p * q * sq_ppq)
+  if(n_pt == 0) then
+    ERI_cosgtos = coeff
+    return
+  endif
+
+  call integrale_new_cosgtos(I_f, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z, p, q, n_pt)
+
+  ERI_cosgtos = I_f * coeff
+
+end function ERI_cosgtos
+
+! ---
+
+subroutine integrale_new_cosgtos(I_f, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z, p, q, n_pt)
+
+  BEGIN_DOC
+  ! Calculates the integral of the polynomial :
+  !
+  ! $I_{x_1}(a_x+b_x, c_x+d_x, p, q) \, I_{x_1}(a_y+b_y, c_y+d_y, p, q) \, I_{x_1}(a_z+b_z, c_z+d_z, p, q)$
+  ! in $( 0 ; 1)$
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)  :: n_pt
+  integer,    intent(in)  :: a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z
+  complex*16, intent(out) :: I_f
+
+  integer                 :: i, j, ix, iy, iz, jx, jy, jz, sx, sy, sz
+  complex*16              :: p, q
+  complex*16              :: pq_inv, p10_1, p10_2, p01_1, p01_2, pq_inv_2
+  complex*16              :: B00(n_pt_max_integrals), B10(n_pt_max_integrals), B01(n_pt_max_integrals)
+  complex*16              :: t1(n_pt_max_integrals), t2(n_pt_max_integrals)
+
+
+  ASSERT (n_pt > 1)
+
+  j = shiftr(n_pt, 1)
+
+  pq_inv   = (0.5d0, 0.d0) / (p + q)
+  p10_1    = (0.5d0, 0.d0) / p
+  p01_1    = (0.5d0, 0.d0) / q
+  p10_2    = (0.5d0, 0.d0) * q /(p * q + p * p)
+  p01_2    = (0.5d0, 0.d0) * p /(q * q + q * p)
+  pq_inv_2 = pq_inv + pq_inv
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: t1, t2, B10, B01, B00
+  ix = a_x + b_x
+  jx = c_x + d_x
+  iy = a_y + b_y
+  jy = c_y + d_y
+  iz = a_z + b_z
+  jz = c_z + d_z
+  sx = ix  +  jx
+  sy = iy  +  jy
+  sz = iz  +  jz
+
+  do i = 1, n_pt
+    B10(i) = p10_1 - gauleg_t2(i, j) * p10_2
+    B01(i) = p01_1 - gauleg_t2(i, j) * p01_2
+    B00(i) = gauleg_t2(i, j) * pq_inv
+  enddo
+
+  if(sx > 0) then
+    call I_x1_new_cosgtos(ix, jx, B10, B01, B00, t1, n_pt)
+  else
+    do i = 1, n_pt
+      t1(i) = (1.d0, 0.d0)
+    enddo
+  endif
+
+  if(sy > 0) then
+    call I_x1_new_cosgtos(iy, jy, B10, B01, B00, t2, n_pt)
+    do i = 1, n_pt
+      t1(i) = t1(i) * t2(i)
+    enddo
+  endif
+
+  if(sz > 0) then
+    call I_x1_new_cosgtos(iz, jz, B10, B01, B00, t2, n_pt)
+    do i = 1, n_pt
+      t1(i) = t1(i) * t2(i)
+    enddo
+  endif
+
+  I_f = (0.d0, 0.d0)
+  do i = 1, n_pt
+    I_f += gauleg_w(i, j) * t1(i)
+  enddo
+
+end subroutine integrale_new_cosgtos
+
+! ---
+
+recursive subroutine I_x1_new_cosgtos(a, c, B_10, B_01, B_00, res, n_pt)
+
+  BEGIN_DOC
+  !  recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)  :: a, c, n_pt
+  complex*16, intent(in)  :: B_10(n_pt_max_integrals), B_01(n_pt_max_integrals), B_00(n_pt_max_integrals)
+  complex*16, intent(out) :: res(n_pt_max_integrals)
+
+  integer                 :: i
+  complex*16              :: res2(n_pt_max_integrals)
+
+  if(c < 0) then
+
+    do i = 1, n_pt
+      res(i) = (0.d0, 0.d0)
+    enddo
+
+  else if (a == 0) then
+
+    call I_x2_new_cosgtos(c, B_10, B_01, B_00, res, n_pt)
+
+  else if (a == 1) then
+
+    call I_x2_new_cosgtos(c-1, B_10, B_01, B_00, res, n_pt)
+    do i = 1, n_pt
+      res(i) = dble(c) * B_00(i) * res(i)
+    enddo
+
+  else
+
+    call I_x1_new_cosgtos(a-2, c  , B_10, B_01, B_00, res , n_pt)
+    call I_x1_new_cosgtos(a-1, c-1, B_10, B_01, B_00, res2, n_pt)
+    do i = 1, n_pt
+      res(i) = dble(a-1) * B_10(i) * res(i) + dble(c) * B_00(i) * res2(i)
+    enddo
+
+  endif
+
+end subroutine I_x1_new_cosgtos
+
+! ---
+
+recursive subroutine I_x2_new_cosgtos(c, B_10, B_01, B_00, res, n_pt)
+
+  BEGIN_DOC
+  !  recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)  :: c, n_pt
+  complex*16, intent(in)  :: B_10(n_pt_max_integrals), B_01(n_pt_max_integrals), B_00(n_pt_max_integrals)
+  complex*16, intent(out) :: res(n_pt_max_integrals)
+
+  integer                 :: i
+
+  if(c == 1) then
+
+    do i = 1, n_pt
+      res(i) = (0.d0, 0.d0)
+    enddo
+
+  elseif(c == 0) then
+
+    do i = 1, n_pt
+      res(i) = (1.d0, 0.d0)
+    enddo
+
+  else
+
+    call I_x1_new_cosgtos(0, c-2, B_10, B_01, B_00, res, n_pt)
+    do i = 1, n_pt
+      res(i) = dble(c-1) * B_01(i) * res(i)
+    enddo
+
+  endif
+
+end subroutine I_x2_new_cosgtos
+
+! ---
+
+subroutine give_cpolynom_mult_center_x( P_center, Q_center, a_x, d_x, p, q, n_pt_in &
+                                      , pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, d, n_pt_out)
+
+  BEGIN_DOC
+  ! subroutine that returns the explicit polynom in term of the "t"
+  ! variable of the following polynoms :
+  !
+  ! $I_{x_1}(a_x,d_x,p,q) \, I_{x_1}(a_y,d_y,p,q) \ I_{x_1}(a_z,d_z,p,q)$
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)  :: n_pt_in, a_x, d_x
+  complex*16, intent(in)  :: P_center, Q_center, p, q, pq_inv, p10_1, p01_1, p10_2, p01_2, pq_inv_2
+  integer,    intent(out) :: n_pt_out
+  complex*16, intent(out) :: d(0:max_dim)
+
+  integer                 :: n_pt1, i
+  complex*16              :: B10(0:2), B01(0:2), B00(0:2), C00(0:2), D00(0:2)
+
+  ASSERT (n_pt_in >= 0)
+
+  B10(0) = p10_1
+  B10(1) = (0.d0, 0.d0)
+  B10(2) = -p10_2
+
+  B01(0) = p01_1
+  B01(1) = (0.d0, 0.d0)
+  B01(2) = -p01_2
+
+  B00(0) = (0.d0, 0.d0)
+  B00(1) = (0.d0, 0.d0)
+  B00(2) = pq_inv
+
+  C00(0) = (0.d0, 0.d0)
+  C00(1) = (0.d0, 0.d0)
+  C00(2) =  -q * (P_center - Q_center) * pq_inv_2
+
+  D00(0) = (0.d0, 0.d0)
+  D00(1) = (0.d0, 0.d0)
+  D00(2) =  -p * (Q_center - P_center) * pq_inv_2
+
+  do i = 0, n_pt_in
+    d(i) = (0.d0, 0.d0)
+  enddo
+
+  n_pt1 = n_pt_in
+
+  !DIR$ FORCEINLINE
+  call I_x1_pol_mult_cosgtos(a_x, d_x, B10, B01, B00, C00, D00, d, n_pt1, n_pt_in)
+  n_pt_out = n_pt1
+
+!  print *, ' '
+!  print *, a_x, d_x
+!  print *, real(B10), real(B01), real(B00), real(C00), real(D00)
+!  print *, n_pt1, real(d(0:n_pt1))
+!  print *, ' '
+
+  if(n_pt1 < 0) then
+    n_pt_out = -1
+    do i = 0, n_pt_in
+      d(i) = (0.d0, 0.d0)
+    enddo
+    return
+  endif
+
+end subroutine give_cpolynom_mult_center_x
+
+! ---
+
+subroutine I_x1_pol_mult_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+
+  BEGIN_DOC
+  ! Recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: n_pt_in, a, c
+  complex*16, intent(in)    :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max_dim)
+
+  if( (c >= 0) .and. (nd >= 0) ) then
+
+    if(a == 1) then
+      call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+    else if(a == 2) then
+      call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+    else if(a > 2) then
+      call I_x1_pol_mult_recurs_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+    else  ! a == 0
+
+      if(c == 0)then
+        nd = 0
+        d(0) = (1.d0, 0.d0)
+        return
+      endif
+
+      call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+    endif
+
+  else
+
+    nd = -1
+
+  endif
+
+end subroutine I_x1_pol_mult_cosgtos
+
+! ---
+
+recursive subroutine I_x1_pol_mult_recurs_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+
+  BEGIN_DOC
+  ! Recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: n_pt_in, a, c
+  complex*16, intent(in)    :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max_dim)
+
+  integer                   :: nx, ix, iy, ny
+  complex*16                :: X(0:max_dim)
+  complex*16                :: Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
+
+  ASSERT (a > 2)
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    X(ix) = (0.d0, 0.d0)
+  enddo
+
+  nx = 0
+  if(a == 3) then
+    call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+  elseif(a == 4) then
+    call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+  else
+    ASSERT (a >= 5)
+    call I_x1_pol_mult_recurs_cosgtos(a-2, c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+  endif
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, nx
+    X(ix) *= dble(a-1)
+  enddo
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(X, nx, B_10, 2, d, nd)
+  nx = nd
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    X(ix) = (0.d0, 0.d0)
+  enddo
+
+  if(c > 0) then
+
+    if(a == 3) then
+      call I_x1_pol_mult_a2_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+    else
+      ASSERT(a >= 4)
+      call I_x1_pol_mult_recurs_cosgtos(a-1, c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+    endif
+
+    if(c > 1) then
+      !DIR$ LOOP COUNT(8)
+      do ix = 0, nx
+        X(ix) *= dble(c)
+      enddo
+    endif
+    !DIR$ FORCEINLINE
+    call multiply_cpoly(X, nx, B_00, 2, d, nd)
+
+  endif
+
+  ny = 0
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    Y(ix) = (0.d0, 0.d0)
+  enddo
+
+  ASSERT (a > 2)
+
+  if(a == 3) then
+    call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in)
+  else
+    ASSERT(a >= 4)
+    call I_x1_pol_mult_recurs_cosgtos(a-1, c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in)
+  endif
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(Y, ny, C_00, 2, d, nd)
+
+end subroutine I_x1_pol_mult_recurs_cosgtos
+
+! ---
+
+recursive subroutine I_x1_pol_mult_a1_cosgtos(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
+
+  BEGIN_DOC
+  ! Recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: n_pt_in, c
+  complex*16, intent(in)    :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max_dim)
+
+  integer                   :: nx, ix, iy, ny
+  complex*16                :: X(0:max_dim)
+  complex*16                :: Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
+
+  if( (c < 0) .or. (nd < 0) ) then
+    nd = -1
+    return
+  endif
+
+  nx = nd
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    X(ix) = (0.d0, 0.d0)
+  enddo
+  call I_x2_pol_mult_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+
+  if(c > 1) then
+    !DIR$ LOOP COUNT(8)
+    do ix = 0, nx
+      X(ix) *= dble(c)
+    enddo
+  endif
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(X, nx, B_00, 2, d, nd)
+
+  ny = 0
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    Y(ix) = (0.d0, 0.d0)
+  enddo
+  call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in)
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(Y, ny, C_00, 2, d, nd)
+
+end subroutine I_x1_pol_mult_a1_cosgtos
+
+! ---
+
+recursive subroutine I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+
+  BEGIN_DOC
+  ! Recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: n_pt_in, c
+  complex*16, intent(in)    :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max_dim)
+
+  integer                   :: nx, ix, iy, ny
+  complex*16                :: X(0:max_dim)
+  complex*16                :: Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    X(ix) = (0.d0, 0.d0)
+  enddo
+
+  nx = 0
+  call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(X, nx, B_10, 2, d, nd)
+
+  nx = nd
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    X(ix) = (0.d0, 0.d0)
+  enddo
+
+  !DIR$ FORCEINLINE
+  call I_x1_pol_mult_a1_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+
+  if (c>1) then
+    !DIR$ LOOP COUNT(8)
+    do ix = 0, nx
+      X(ix) *= dble(c)
+    enddo
+  endif
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(X, nx, B_00, 2, d, nd)
+
+  ny = 0
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    Y(ix) = 0.d0
+  enddo
+  !DIR$ FORCEINLINE
+  call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in)
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(Y, ny, C_00, 2, d, nd)
+
+end subroutine I_x1_pol_mult_a2_cosgtos
+
+! ---
+
+recursive subroutine I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, dim)
+
+  BEGIN_DOC
+  ! Recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: dim, c
+  complex*16, intent(in)    :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max_dim)
+
+  integer                   :: i
+  integer                   :: nx, ix, ny
+  complex*16                :: X(0:max_dim), Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
+
+  select case (c)
+
+    case (0)
+      nd = 0
+      d(0) = (1.d0, 0.d0)
+      return
+
+    case (:-1)
+      nd = -1
+      return
+
+    case (1)
+      nd = 2
+      d(0) = D_00(0)
+      d(1) = D_00(1)
+      d(2) = D_00(2)
+      return
+
+    case (2)
+      nd = 2
+      d(0) = B_01(0)
+      d(1) = B_01(1)
+      d(2) = B_01(2)
+
+      ny = 2
+      Y(0) = D_00(0)
+      Y(1) = D_00(1)
+      Y(2) = D_00(2)
+
+      !DIR$ FORCEINLINE
+      call multiply_cpoly(Y, ny, D_00, 2, d, nd)
+      return
+
+    case default
+
+      !DIR$ LOOP COUNT(6)
+      do ix = 0, c+c
+        X(ix) = (0.d0, 0.d0)
+      enddo
+      nx = 0
+      call I_x2_pol_mult_cosgtos(c-2, B_10, B_01, B_00, C_00, D_00, X, nx, dim)
+
+      !DIR$ LOOP COUNT(6)
+      do ix = 0, nx
+        X(ix) *= dble(c-1)
+      enddo
+
+      !DIR$ FORCEINLINE
+      call multiply_cpoly(X, nx, B_01, 2, d, nd)
+
+      ny = 0
+      !DIR$ LOOP COUNT(6)
+      do ix = 0, c+c
+        Y(ix) = 0.d0
+      enddo
+      call I_x2_pol_mult_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, Y, ny, dim)
+
+      !DIR$ FORCEINLINE
+      call multiply_cpoly(Y, ny, D_00, 2, d, nd)
+
+  end select
+
+end subroutine I_x2_pol_mult_cosgtos
+
+! ---
+
+
diff --git a/src/davidson/EZFIO.cfg b/src/davidson/EZFIO.cfg
index bfa55526..1152560f 100644
--- a/src/davidson/EZFIO.cfg
+++ b/src/davidson/EZFIO.cfg
@@ -1,71 +1,18 @@
-[threshold_davidson]
-type: Threshold
-doc: Thresholds of Davidson's algorithm if threshold_davidson_from_pt2 is false.
-interface: ezfio,provider,ocaml
-default: 1.e-10
-
-[threshold_nonsym_davidson]
-type: Threshold
-doc: Thresholds of non-symetric Davidson's algorithm
-interface: ezfio,provider,ocaml
-default: 1.e-10
-
-[threshold_davidson_from_pt2]
-type: logical
-doc: Thresholds of Davidson's algorithm is set to E(rPT2)*threshold_davidson_from_pt2
-interface: ezfio,provider,ocaml
-default: false
-
-[n_states_diag]
-type: States_number
-doc: Controls the number of states to consider during the Davdison diagonalization. The number of states is n_states * n_states_diag
-default: 4
-interface: ezfio,ocaml
-
-[davidson_sze_max]
-type: Strictly_positive_int
-doc: Number of micro-iterations before re-contracting
-default: 15
-interface: ezfio,provider,ocaml
-
-[state_following]
-type: logical
-doc: If |true|, the states are re-ordered to match the input states
-default: False
-interface: ezfio,provider,ocaml
-
-[disk_based_davidson]
-type: logical
-doc: If |true|, a memory-mapped file may be used to store the W and S2 vectors if not enough RAM is available
-default: True
-interface: ezfio,provider,ocaml
-
 [csf_based]
 type: logical
 doc: If |true|, use the CSF-based algorithm
 default: False
 interface: ezfio,provider,ocaml
 
-[distributed_davidson]
-type: logical
-doc: If |true|, use the distributed algorithm
-default: True 
-interface: ezfio,provider,ocaml
-
 [only_expected_s2]
 type: logical
 doc: If |true|, use filter out all vectors with bad |S^2| values
 default: True
 interface: ezfio,provider,ocaml
 
-[n_det_max_full]
-type: Det_number_max
-doc: Maximum number of determinants where |H| is fully diagonalized 
-interface: ezfio,provider,ocaml
-default: 1000
-
 [without_diagonal]
 type: logical
 doc: If |true|, don't use denominator
 default: False
 interface: ezfio,provider,ocaml
+
diff --git a/src/davidson/NEED b/src/davidson/NEED
index bfe31bd0..bd0abe2f 100644
--- a/src/davidson/NEED
+++ b/src/davidson/NEED
@@ -1 +1,2 @@
 csf
+davidson_keywords
diff --git a/src/davidson/davidson_parallel.irp.f b/src/davidson/davidson_parallel.irp.f
index 399ab11b..f3226307 100644
--- a/src/davidson/davidson_parallel.irp.f
+++ b/src/davidson/davidson_parallel.irp.f
@@ -548,21 +548,6 @@ end
 
 
 
-BEGIN_PROVIDER [ integer, nthreads_davidson ]
- implicit none
- BEGIN_DOC
- ! Number of threads for Davidson
- END_DOC
- nthreads_davidson = nproc
- character*(32) :: env
- call getenv('QP_NTHREADS_DAVIDSON',env)
- if (trim(env) /= '') then
-   call lock_io()
-   read(env,*) nthreads_davidson
-   call unlock_io()
-   call write_int(6,nthreads_davidson,'Target number of threads for <Psi|H|Psi>')
- endif
-END_PROVIDER
 
 
 integer function zmq_put_N_states_diag(zmq_to_qp_run_socket,worker_id)
diff --git a/src/davidson/diagonalization_hs2_dressed.irp.f b/src/davidson/diagonalization_hs2_dressed.irp.f
index 346f1cf9..45258c1c 100644
--- a/src/davidson/diagonalization_hs2_dressed.irp.f
+++ b/src/davidson/diagonalization_hs2_dressed.irp.f
@@ -14,15 +14,6 @@ BEGIN_PROVIDER [ character*(64), diag_algorithm ]
   endif
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, threshold_davidson_pt2 ]
- implicit none
- BEGIN_DOC
- ! Threshold of Davidson's algorithm, using PT2 as a guide
- END_DOC
- threshold_davidson_pt2 = threshold_davidson
-
-END_PROVIDER
-
 
 
 BEGIN_PROVIDER [ integer, dressed_column_idx, (N_states) ]
@@ -66,7 +57,7 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d
   double precision, allocatable  :: H_jj(:)
 
   double precision, external     :: diag_H_mat_elem, diag_S_mat_elem
-  integer                        :: i,k
+  integer                        :: i,k,l
   ASSERT (N_st > 0)
   ASSERT (sze > 0)
   ASSERT (Nint > 0)
@@ -87,9 +78,14 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d
 
   if (dressing_state > 0) then
     do k=1,N_st
+
       do i=1,sze
-        H_jj(i)  += u_in(i,k) * dressing_column_h(i,k)
+        H_jj(i) += u_in(i,k) * dressing_column_h(i,k)
       enddo
+
+      !l = dressed_column_idx(k)
+      !H_jj(l) += u_in(l,k) * dressing_column_h(l,k)
+
     enddo
   endif
 
diff --git a/src/davidson/diagonalization_nonsym_h_dressed.irp.f b/src/davidson/diagonalization_nonsym_h_dressed.irp.f
new file mode 100644
index 00000000..3ff060a6
--- /dev/null
+++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f
@@ -0,0 +1,541 @@
+
+! ---
+
+subroutine davidson_diag_nonsym_h(dets_in, u_in, dim_in, energies, sze, N_st, N_st_diag, Nint, dressing_state, converged)
+
+  BEGIN_DOC
+  !
+  ! non-sym Davidson diagonalization.
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in)    :: dim_in, sze, N_st, N_st_diag, Nint
+  integer,           intent(in)    :: dressing_state
+  integer(bit_kind), intent(in)    :: dets_in(Nint,2,sze)
+  logical,           intent(out)   :: converged
+  double precision,  intent(out)   :: energies(N_st_diag)
+  double precision,  intent(inout) :: u_in(dim_in,N_st_diag)
+
+  integer                          :: i, k, l
+  double precision                 :: f
+  double precision, allocatable    :: H_jj(:)
+
+  double precision, external       :: diag_H_mat_elem
+
+  ASSERT (N_st > 0)
+  ASSERT (sze  > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  PROVIDE mo_two_e_integrals_in_map
+
+  allocate(H_jj(sze))
+
+  H_jj(1) = diag_H_mat_elem(dets_in(1,1,1), Nint)
+ !$OMP PARALLEL DEFAULT(NONE)           &
+ !$OMP SHARED(sze, H_jj, dets_in, Nint) &
+ !$OMP PRIVATE(i)
+ !$OMP DO SCHEDULE(static)
+  do i = 2, sze
+    H_jj(i) = diag_H_mat_elem(dets_in(1,1,i), Nint)
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  if(dressing_state > 0) then
+    do k = 1, N_st
+      do l = 1, N_st
+        f = overlap_states_inv(k,l)
+
+        !do i = 1, N_det
+        !  H_jj(i) += f * dressing_delta(i,k) * psi_coef(i,l)
+        do i = 1, dim_in
+          H_jj(i) += f * dressing_delta(i,k) * u_in(i,l)
+        enddo
+
+      enddo
+    enddo
+  endif
+
+  call davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, N_st, N_st_diag, Nint, dressing_state, converged)
+
+  deallocate(H_jj)
+
+end subroutine davidson_diag_nonsym_h
+
+! ---
+
+subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, N_st, N_st_diag_in, Nint, dressing_state, converged)
+
+  BEGIN_DOC
+  !
+  ! non-sym Davidson diagonalization with specific diagonal elements of the H matrix
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  !
+  END_DOC
+
+  include 'constants.include.F'
+
+  use bitmasks
+  use mmap_module
+
+  implicit none
+
+  integer,           intent(in)    :: dim_in, sze, N_st, N_st_diag_in, Nint
+  integer,           intent(in)    :: dressing_state
+  integer(bit_kind), intent(in)    :: dets_in(Nint,2,sze)
+  double precision,  intent(in)    :: H_jj(sze)
+  double precision,  intent(out)   :: energies(N_st_diag_in)
+  logical,           intent(inout) :: converged
+  double precision,  intent(inout) :: u_in(dim_in,N_st_diag_in)
+
+  logical                          :: disk_based
+  character*(16384)                :: write_buffer
+  integer                          :: i, j, k, l, m
+  integer                          :: iter, N_st_diag, itertot, shift, shift2, itermax, istate
+  integer                          :: nproc_target
+  integer                          :: order(N_st_diag_in)
+  integer                          :: maxab
+  double precision                 :: rss
+  double precision                 :: cmax
+  double precision                 :: to_print(2,N_st)
+  double precision                 :: r1, r2
+  double precision                 :: f
+  double precision, allocatable    :: y(:,:), h(:,:), lambda(:)
+  double precision, allocatable    :: s_tmp(:,:), u_tmp(:,:)
+  double precision, allocatable    :: residual_norm(:)
+  double precision, allocatable    :: U(:,:), overlap(:,:)
+  double precision, pointer        :: W(:,:)
+
+  double precision, external       :: u_dot_u
+
+
+  N_st_diag = N_st_diag_in
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda
+  if(N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_full to ', N_st_diag*3
+    stop -1
+  endif
+
+  itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1
+  itertot = 0
+
+  if(state_following) then
+    allocate(overlap(N_st_diag*itermax, N_st_diag*itermax))
+  else
+    allocate(overlap(1,1))  ! avoid 'if' for deallocate
+  endif
+  overlap = 0.d0
+
+  PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2
+  PROVIDE threshold_nonsym_davidson
+
+  call write_time(6)
+  write(6,'(A)') ''
+  write(6,'(A)') 'Davidson Diagonalization'
+  write(6,'(A)') '------------------------'
+  write(6,'(A)') ''
+
+  ! Find max number of cores to fit in memory
+  ! -----------------------------------------
+
+  nproc_target = nproc
+  maxab = max(N_det_alpha_unique, N_det_beta_unique) + 1
+
+  m=1
+  disk_based = .False.
+  call resident_memory(rss)
+  do
+    r1 = 8.d0 *                                   &! bytes
+         ( dble(sze)*(N_st_diag*itermax)          &! U
+         + 1.0d0*dble(sze*m)*(N_st_diag*itermax)  &! W
+         + 3.0d0*(N_st_diag*itermax)**2           &! h,y,s_tmp
+         + 1.d0*(N_st_diag*itermax)               &! lambda
+         + 1.d0*(N_st_diag)                       &! residual_norm
+                                                   ! In H_u_0_nstates_zmq
+         + 2.d0*(N_st_diag*N_det)                 &! u_t, v_t, on collector
+         + 2.d0*(N_st_diag*N_det)                 &! u_t, v_t, on slave
+         + 0.5d0*maxab                            &! idx0 in H_u_0_nstates_openmp_work_*
+         + nproc_target *                         &! In OMP section
+           ( 1.d0*(N_int*maxab)                   &! buffer
+           + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+         ) / 1024.d0**3
+
+    if(nproc_target == 0) then
+      call check_mem(r1, irp_here)
+      nproc_target = 1
+      exit
+    endif
+
+    if(r1+rss < qp_max_mem) then
+      exit
+    endif
+
+    if(itermax > 4) then
+      itermax = itermax - 1
+    else if(m==1 .and. disk_based_davidson) then
+      m = 0
+      disk_based = .True.
+      itermax = 6
+    else
+      nproc_target = nproc_target - 1
+    endif
+
+  enddo
+
+  nthreads_davidson = nproc_target
+  TOUCH nthreads_davidson
+
+  call write_int(6, N_st, 'Number of states')
+  call write_int(6, N_st_diag, 'Number of states in diagonalization')
+  call write_int(6, sze, 'Number of determinants')
+  call write_int(6, nproc_target, 'Number of threads for diagonalization')
+  call write_double(6, r1, 'Memory(Gb)')
+  if(disk_based) then
+    print *, 'Using swap space to reduce RAM'
+  endif
+
+  !---------------
+
+  write(6,'(A)') ''
+  write_buffer = '====='
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6, '(A)') write_buffer(1:6+41*N_st)
+  write_buffer = 'Iter'
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//'       Energy        Residual '
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = '====='
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+
+
+  if(disk_based) then
+    ! Create memory-mapped files for W and S
+    type(c_ptr) :: ptr_w, ptr_s
+    integer :: fd_s, fd_w
+    call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),&
+        8, fd_w, .False., ptr_w)
+    call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/))
+  else
+    allocate(W(sze,N_st_diag*itermax))
+  endif
+
+  allocate(                                                          &
+      ! Large
+      U(sze,N_st_diag*itermax),                                      &
+      ! Small
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
+      residual_norm(N_st_diag),                                      &
+      lambda(N_st_diag*itermax),                                     &
+      u_tmp(N_st,N_st_diag))
+
+  h = 0.d0
+  U = 0.d0
+  y = 0.d0
+  s_tmp = 0.d0
+
+
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+
+  ! Davidson iterations
+  ! ===================
+
+  converged = .False.
+
+  do k = N_st+1, N_st_diag
+    do i = 1, sze
+        call random_number(r1)
+        call random_number(r2)
+        r1 = dsqrt(-2.d0*dlog(r1))
+        r2 = dtwo_pi*r2
+        u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st)
+    enddo
+    u_in(k,k) = u_in(k,k) + 10.d0
+  enddo
+  do k = 1, N_st_diag
+    call normalize(u_in(1,k), sze)
+  enddo
+
+  do k = 1, N_st_diag
+    do i = 1, sze
+      U(i,k) = u_in(i,k)
+    enddo
+  enddo
+
+
+  do while (.not.converged)
+    itertot = itertot + 1
+    if(itertot == 8) then
+      exit
+    endif
+
+    do iter = 1, itermax-1
+
+      shift  = N_st_diag*(iter-1)
+      shift2 = N_st_diag*iter
+
+!      if( (iter > 1) .or. (itertot == 1) ) then
+
+        ! Gram-Schmidt to orthogonalize all new guess with the previous vectors 
+        call ortho_qr(U, size(U, 1), sze, shift2)
+        call ortho_qr(U, size(U, 1), sze, shift2)
+
+        ! Compute |W_k> = \sum_i |i><i|H|u_k>
+        ! -----------------------------------
+
+        if( (sze > 100000) .and. distributed_davidson ) then
+          call H_u_0_nstates_zmq   (W(1,shift+1), U(1,shift+1), N_st_diag, sze)
+        else
+          call H_u_0_nstates_openmp(W(1,shift+1), U(1,shift+1), N_st_diag, sze)
+        endif
+!      else
+!         ! Already computed in update below
+!         continue
+!      endif
+
+      if(dressing_state > 0) then
+
+        call dgemm( 'T', 'N', N_st, N_st_diag, sze, 1.d0                   &
+                  , psi_coef, size(psi_coef, 1), U(1, shift+1), size(U, 1) &
+                  , 0.d0, u_tmp, size(u_tmp, 1))
+
+        do istate = 1, N_st_diag
+          do k = 1, N_st
+            do l = 1, N_st
+             f = overlap_states_inv(k,l)
+              do i = 1, sze
+                W(i,shift+istate) += f * dressing_delta(i,k) * u_tmp(l,istate)
+              enddo
+            enddo
+          enddo
+        enddo
+
+      endif
+
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+
+      call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 &
+                , U, size(U, 1), W, size(W, 1)        &
+                , 0.d0, h, size(h, 1))
+
+      ! Diagonalize h
+      ! ---------------
+      call diag_nonsym_right(shift2, h(1,1), size(h, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1))
+
+
+      if (state_following) then
+
+        overlap = -1.d0
+        do k = 1, shift2
+          do i = 1, shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
+        enddo
+        do k = 1, N_st
+          cmax = -1.d0
+          do i = 1, N_st
+            if(overlap(i,k) > cmax) then
+              cmax = overlap(i,k)
+              order(k) = i
+            endif
+          enddo
+          do i = 1, N_st_diag
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k = 1, N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
+          endif
+        enddo
+        do k = 1, N_st
+          overlap(k,1) = lambda(k)
+        enddo
+
+      endif
+
+      ! Express eigenvectors of h in the determinant basis
+      ! --------------------------------------------------
+
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , U, size(U, 1), y, size(y, 1)           &
+                , 0.d0, U(1,shift2+1), size(U, 1))
+
+      do k = 1, N_st_diag
+        call normalize(U(1,shift2+k), sze)
+      enddo
+
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , W, size(W, 1), y, size(y, 1)           &
+                , 0.d0, W(1,shift2+1), size(W,1))
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+
+      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      do k = 1, N_st_diag
+        do i = 1, sze
+          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2)
+        enddo
+
+        if(k <= N_st) then
+          residual_norm(k) = u_dot_u(U(1,shift2+k), sze)
+          to_print(1,k) = lambda(k) + nuclear_repulsion
+          to_print(2,k) = residual_norm(k)
+        endif
+      enddo
+      !$OMP END PARALLEL DO
+
+      if((itertot>1).and.(iter == 1)) then
+        !don't print
+        continue
+      else
+        write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, E11.3))') iter-1, to_print(1:2,1:N_st)
+      endif
+
+      ! Check convergence
+      if(iter > 1) then
+        if(threshold_davidson_from_pt2) then
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2
+        else
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson
+        endif
+      endif
+
+      do k = 1, N_st
+        if(residual_norm(k) > 1.d8) then
+          print *, 'Davidson failed'
+          stop -1
+        endif
+      enddo
+      if(converged) then
+        exit
+      endif
+
+      logical, external :: qp_stop
+      if(qp_stop()) then
+        converged = .True.
+        exit
+      endif
+
+
+    enddo
+
+    ! Re-contract U and update W
+    ! --------------------------------
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+              , W, size(W, 1), y, size(y, 1)           & 
+              , 0.d0, u_in, size(u_in, 1))
+    do k = 1, N_st_diag
+      do i = 1, sze
+        W(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+              , U, size(U, 1), y, size(y, 1), 0.d0     &
+              , u_in, size(u_in, 1))
+
+    do k = 1, N_st_diag
+      do i = 1, sze
+        U(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+  enddo
+
+
+  call nullify_small_elements(sze, N_st_diag, U, size(U, 1), threshold_davidson_pt2)
+  do k = 1, N_st_diag
+    do i = 1, sze
+      u_in(i,k) = U(i,k)
+    enddo
+  enddo
+
+  do k = 1, N_st_diag
+    energies(k) = lambda(k)
+  enddo
+  write_buffer = '======'
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') trim(write_buffer)
+  write(6,'(A)') ''
+  call write_time(6)
+
+  if(disk_based) then
+    ! Remove temp files
+    integer, external :: getUnitAndOpen
+    call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w )
+    fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r')
+    close(fd_w,status='delete')
+  else
+    deallocate(W)
+  endif
+
+  deallocate (                                                       &
+      residual_norm,                                                 &
+      U, overlap,                                                    &
+      h, y, s_tmp,                                                   &
+      lambda,                                                        &
+      u_tmp                                                          &
+      )
+  FREE nthreads_davidson
+
+end subroutine davidson_diag_nonsym_hjj
+
+! ---
+
+
+
+
+
+
+
diff --git a/src/davidson/overlap_states.irp.f b/src/davidson/overlap_states.irp.f
new file mode 100644
index 00000000..797d1210
--- /dev/null
+++ b/src/davidson/overlap_states.irp.f
@@ -0,0 +1,40 @@
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, overlap_states,     (N_states,N_states) ]
+&BEGIN_PROVIDER [ double precision, overlap_states_inv, (N_states,N_states) ]
+
+  BEGIN_DOC
+  !
+  ! S_kl = ck.T x cl 
+  !      = psi_coef(:,k).T x psi_coef(:,l)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: o_tmp
+
+  if(N_states == 1) then
+
+    o_tmp = 0.d0
+    do i = 1, N_det
+      o_tmp = o_tmp + psi_coef(i,1) * psi_coef(i,1)
+    enddo
+    overlap_states    (1,1) = o_tmp
+    overlap_states_inv(1,1) = 1.d0 / o_tmp
+
+  else
+
+    call dgemm( 'T', 'N', N_states, N_states, N_det, 1.d0                &
+              , psi_coef, size(psi_coef, 1), psi_coef, size(psi_coef, 1) &
+              , 0.d0, overlap_states, size(overlap_states, 1)            )
+
+    call get_inverse(overlap_states, N_states, N_states, overlap_states_inv, N_states)
+
+  endif
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/davidson_dressed/nonsym_diagonalize_ci.irp.f b/src/davidson_dressed/nonsym_diagonalize_ci.irp.f
new file mode 100644
index 00000000..fa4b8b33
--- /dev/null
+++ b/src/davidson_dressed/nonsym_diagonalize_ci.irp.f
@@ -0,0 +1,188 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, CI_energy_nonsym_dressed, (N_states_diag) ]
+
+  BEGIN_DOC
+  ! N_states lowest eigenvalues of the CI matrix
+  END_DOC
+
+  implicit none
+  integer       :: j
+  character*(8) :: st
+
+  call write_time(6)
+  do j = 1, min(N_det, N_states_diag)
+    CI_energy_nonsym_dressed(j) = CI_electronic_energy_nonsym_dressed(j) + nuclear_repulsion
+  enddo
+
+  do j = 1, min(N_det, N_states)
+    write(st, '(I4)') j
+    call write_double(6, CI_energy_nonsym_dressed(j), 'Energy of state '//trim(st))
+  enddo
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, CI_electronic_energy_nonsym_dressed, (N_states_diag) ]
+&BEGIN_PROVIDER [ double precision, CI_eigenvectors_nonsym_dressed, (N_det,N_states_diag) ]
+
+  BEGIN_DOC
+  ! Eigenvectors/values of the CI matrix
+  END_DOC
+
+  implicit none
+  logical                       :: converged
+  integer                       :: i, j, k
+  integer                       :: i_other_state
+  integer                       :: i_state
+  logical,          allocatable :: good_state_array(:)
+  integer,          allocatable :: index_good_state_array(:)
+  double precision, allocatable :: eigenvectors(:,:), eigenvalues(:)
+
+  PROVIDE threshold_nonsym_davidson nthreads_davidson
+
+  ! Guess values for the "N_states" states of the CI_eigenvectors_nonsym_dressed
+  do j = 1, min(N_states, N_det)
+    do i = 1, N_det
+      CI_eigenvectors_nonsym_dressed(i,j) = psi_coef(i,j)
+    enddo
+  enddo
+
+  do j = min(N_states, N_det)+1, N_states_diag
+    do i = 1, N_det
+      CI_eigenvectors_nonsym_dressed(i,j) = 0.d0
+    enddo
+  enddo
+
+  ! ---
+
+  if(diag_algorithm == "Davidson") then
+
+    ASSERT(n_states_diag .lt. n_states)
+
+    do j = 1, min(N_states, N_det)
+      do i = 1, N_det
+        CI_eigenvectors_nonsym_dressed(i,j) = psi_coef(i,j)
+      enddo
+    enddo
+
+    converged = .False.
+    call davidson_diag_nonsym_h( psi_det, CI_eigenvectors_nonsym_dressed &
+                               , size(CI_eigenvectors_nonsym_dressed, 1) &
+                               , CI_electronic_energy_nonsym_dressed     &
+                               , N_det, min(N_det, N_states), min(N_det, N_states_diag), N_int, 1, converged )
+
+  else if(diag_algorithm == "Lapack") then
+
+    allocate(eigenvectors(size(H_matrix_nonsym_dressed, 1),N_det))
+    allocate(eigenvalues(N_det))
+
+    call diag_nonsym_right( N_det, H_matrix_nonsym_dressed, size(H_matrix_nonsym_dressed, 1)       &
+                          , eigenvectors, size(eigenvectors, 1), eigenvalues, size(eigenvalues, 1) )
+
+    CI_electronic_energy_nonsym_dressed(:) = 0.d0
+
+    ! Select the "N_states_diag" states of lowest energy
+    do j = 1, min(N_det, N_states_diag)
+      do i = 1, N_det
+        CI_eigenvectors_nonsym_dressed(i,j) = eigenvectors(i,j)
+      enddo
+      CI_electronic_energy_nonsym_dressed(j) = eigenvalues(j)
+    enddo
+
+    deallocate(eigenvectors, eigenvalues)
+
+    ! --- ---
+
+  endif
+
+  ! ---
+
+END_PROVIDER
+
+! ---
+
+subroutine diagonalize_CI_nonsym_dressed()
+
+  BEGIN_DOC
+  !  Replace the coefficients of the CI states by the coefficients of the
+  !  eigenstates of the CI matrix
+  END_DOC
+
+  implicit none
+  integer :: i, j
+
+  PROVIDE dressing_delta
+
+  do j = 1, N_states
+    do i = 1, N_det
+      psi_coef(i,j) = CI_eigenvectors_nonsym_dressed(i,j)
+    enddo
+  enddo
+
+  SOFT_TOUCH psi_coef
+
+end subroutine diagonalize_CI_nonsym_dressed
+
+! ---
+
+BEGIN_PROVIDER [ double precision, H_matrix_nonsym_dressed, (N_det,N_det) ]
+
+  BEGIN_DOC
+  ! Dressed H with Delta_ij
+  END_DOC
+
+  implicit none
+  integer          :: i, j, l, k
+  double precision :: f
+
+  H_matrix_nonsym_dressed(1:N_det,1:N_det) = h_matrix_all_dets(1:N_det,1:N_det)
+
+  if(N_states == 1) then
+
+!    !symmetric formula
+!    l = dressed_column_idx(1)
+!    f = 1.0d0/psi_coef(l,1)
+!    do i=1,N_det
+!      h_matrix_nonsym_dressed(i,l) +=  dressing_column_h(i,1) *f
+!      h_matrix_nonsym_dressed(l,i) +=  dressing_column_h(i,1) *f
+!    enddo
+
+!    l = dressed_column_idx(1)
+!    f = 1.0d0 / psi_coef(l,1)
+!    do j = 1, N_det
+!      H_matrix_nonsym_dressed(j,l) += f * dressing_delta(j,1) 
+!    enddo
+
+    k = 1
+    l = 1
+    f = overlap_states_inv(k,l)
+    do j = 1, N_det
+      do i = 1, N_det
+        H_matrix_nonsym_dressed(i,j) = H_matrix_nonsym_dressed(i,j) + f * dressing_delta(i,k) * psi_coef(j,l)
+      enddo
+    enddo
+
+  else
+
+    do k = 1, N_states
+      do l = 1, N_states
+        f = overlap_states_inv(k,l)
+
+        do j = 1, N_det
+          do i = 1, N_det
+            H_matrix_nonsym_dressed(i,j) = H_matrix_nonsym_dressed(i,j) + f * dressing_delta(i,k) * psi_coef(j,l)
+          enddo
+        enddo
+
+      enddo
+    enddo
+
+  endif
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/davidson_keywords/EZFIO.cfg b/src/davidson_keywords/EZFIO.cfg
new file mode 100644
index 00000000..6337b96f
--- /dev/null
+++ b/src/davidson_keywords/EZFIO.cfg
@@ -0,0 +1,54 @@
+[threshold_davidson]
+type: Threshold
+doc: Thresholds of Davidson's algorithm if threshold_davidson_from_pt2 is false.
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
+[threshold_nonsym_davidson]
+type: Threshold
+doc: Thresholds of non-symetric Davidson's algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
+[davidson_sze_max]
+type: Strictly_positive_int
+doc: Number of micro-iterations before re-contracting
+default: 15
+interface: ezfio,provider,ocaml
+
+[state_following]
+type: logical
+doc: If |true|, the states are re-ordered to match the input states
+default: False
+interface: ezfio,provider,ocaml
+
+[disk_based_davidson]
+type: logical
+doc: If |true|, a memory-mapped file may be used to store the W and S2 vectors if not enough RAM is availabl
+default: True
+interface: ezfio,provider,ocaml
+
+[n_states_diag]
+type: States_number
+doc: Controls the number of states to consider during the Davdison diagonalization. The number of states is n_states * n_states_diag
+default: 4
+interface: ezfio,ocaml
+
+[n_det_max_full]
+type: Det_number_max
+doc: Maximum number of determinants where |H| is fully diagonalized
+interface: ezfio,provider,ocaml
+default: 1000
+
+[threshold_davidson_from_pt2]
+type: logical
+doc: Thresholds of Davidson's algorithm is set to E(rPT2)*threshold_davidson_from_pt2
+interface: ezfio,provider,ocaml
+default: false
+
+[distributed_davidson]
+type: logical
+doc: If |true|, use the distributed algorithm
+default: True
+interface: ezfio,provider,ocaml
+
diff --git a/src/davidson_keywords/NEED b/src/davidson_keywords/NEED
new file mode 100644
index 00000000..5a3182ed
--- /dev/null
+++ b/src/davidson_keywords/NEED
@@ -0,0 +1 @@
+ezfio_files
diff --git a/src/davidson_keywords/README.rst b/src/davidson_keywords/README.rst
new file mode 100644
index 00000000..9567cdb1
--- /dev/null
+++ b/src/davidson_keywords/README.rst
@@ -0,0 +1,5 @@
+=================
+davidson_keywords
+=================
+
+Keywords used for Davidson algorithms.
diff --git a/src/davidson/input.irp.f b/src/davidson_keywords/input.irp.f
similarity index 77%
rename from src/davidson/input.irp.f
rename to src/davidson_keywords/input.irp.f
index b37c87d0..d1d6124f 100644
--- a/src/davidson/input.irp.f
+++ b/src/davidson_keywords/input.irp.f
@@ -1,3 +1,6 @@
+
+! ---
+
 BEGIN_PROVIDER [ integer, n_states_diag  ]
   implicit none
   BEGIN_DOC
@@ -8,11 +11,11 @@ BEGIN_PROVIDER [ integer, n_states_diag  ]
   PROVIDE ezfio_filename
   if (mpi_master) then
 
-    call ezfio_has_davidson_n_states_diag(has)
+    call ezfio_has_davidson_keywords_n_states_diag(has)
     if (has) then
-      call ezfio_get_davidson_n_states_diag(n_states_diag)
+      call ezfio_get_davidson_keywords_n_states_diag(n_states_diag)
     else
-      print *, 'davidson/n_states_diag not found in EZFIO file'
+      print *, 'davidson_keywords/n_states_diag not found in EZFIO file'
       stop 1
     endif
     n_states_diag = max(2,N_states * N_states_diag)
@@ -32,3 +35,4 @@ BEGIN_PROVIDER [ integer, n_states_diag  ]
 
 END_PROVIDER
 
+! ---
diff --git a/src/davidson_keywords/usef.irp.f b/src/davidson_keywords/usef.irp.f
new file mode 100644
index 00000000..fed2ba9b
--- /dev/null
+++ b/src/davidson_keywords/usef.irp.f
@@ -0,0 +1,33 @@
+use bitmasks
+use f77_zmq
+
+
+! ---
+
+BEGIN_PROVIDER [ integer, nthreads_davidson ]
+ implicit none
+ BEGIN_DOC
+ ! Number of threads for Davidson
+ END_DOC
+ nthreads_davidson = nproc
+ character*(32) :: env
+ call getenv('QP_NTHREADS_DAVIDSON',env)
+ if (trim(env) /= '') then
+   read(env,*) nthreads_davidson
+   call write_int(6,nthreads_davidson,'Target number of threads for <Psi|H|Psi>')
+ endif
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, threshold_davidson_pt2 ]
+ implicit none
+ BEGIN_DOC
+ ! Threshold of Davidson's algorithm, using PT2 as a guide
+ END_DOC
+ threshold_davidson_pt2 = threshold_davidson
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/davidson_undressed/null_dressing_vector.irp.f b/src/davidson_undressed/null_dressing_vector.irp.f
index faffe964..1989bb6d 100644
--- a/src/davidson_undressed/null_dressing_vector.irp.f
+++ b/src/davidson_undressed/null_dressing_vector.irp.f
@@ -1,10 +1,12 @@
  BEGIN_PROVIDER [ double precision, dressing_column_h, (N_det,N_states) ]
 &BEGIN_PROVIDER [ double precision, dressing_column_s, (N_det,N_states) ]
+&BEGIN_PROVIDER [ double precision, dressing_delta   , (N_det,N_states) ]
  implicit none
  BEGIN_DOC
  ! Null dressing vectors
  END_DOC
  dressing_column_h(:,:) = 0.d0
  dressing_column_s(:,:) = 0.d0
+ dressing_delta   (:,:) = 0.d0
 END_PROVIDER
 
diff --git a/src/determinants/spindeterminants.ezfio_config b/src/determinants/spindeterminants.ezfio_config
index 39ccb82b..4fe1333a 100644
--- a/src/determinants/spindeterminants.ezfio_config
+++ b/src/determinants/spindeterminants.ezfio_config
@@ -9,8 +9,11 @@ spindeterminants
   psi_det_beta    integer*8 (spindeterminants_n_int*spindeterminants_bit_kind/8,spindeterminants_n_det_beta)
   psi_coef_matrix_rows    integer (spindeterminants_n_det)
   psi_coef_matrix_columns integer (spindeterminants_n_det)
-  psi_coef_matrix_values  double precision (spindeterminants_n_det,spindeterminants_n_states)
+  psi_coef_matrix_values      double precision (spindeterminants_n_det,spindeterminants_n_states)
+  psi_left_coef_matrix_values double precision (spindeterminants_n_det,spindeterminants_n_states)
   n_svd_coefs     integer
+  n_svd_alpha     integer
+  n_svd_beta      integer
   psi_svd_alpha   double precision (spindeterminants_n_det_alpha,spindeterminants_n_svd_coefs,spindeterminants_n_states)
   psi_svd_beta    double precision (spindeterminants_n_det_beta,spindeterminants_n_svd_coefs,spindeterminants_n_states)
   psi_svd_coefs   double precision (spindeterminants_n_svd_coefs,spindeterminants_n_states)
diff --git a/src/dft_utils_in_r/dm_in_r_routines.irp.f b/src/dft_utils_in_r/dm_in_r_routines.irp.f
index 9991289c..364b6767 100644
--- a/src/dft_utils_in_r/dm_in_r_routines.irp.f
+++ b/src/dft_utils_in_r/dm_in_r_routines.irp.f
@@ -140,6 +140,8 @@ end
   enddo
  enddo
 
+ ! TODO : build the vector of chi_i(r) chi_j(r) and conscequently grad_i(r) grad_j(r) 
+ !      : the same for gamma_ij and big dot product 
  do istate = 1, N_states
    ! alpha density
    ! aos_array_bis = \rho_ao * aos_array
diff --git a/src/fci_tc_bi/diagonalize_ci.irp.f b/src/fci_tc_bi/diagonalize_ci.irp.f
index c8369e93..b6ec073f 100644
--- a/src/fci_tc_bi/diagonalize_ci.irp.f
+++ b/src/fci_tc_bi/diagonalize_ci.irp.f
@@ -39,6 +39,9 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
    write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc  + pt2_tmp/norm,E_tc  + rpt2_tmp/norm,abs_pt2
    print*,'*****'
   endif
+  psi_energy(1:N_states) = eigval_right_tc_bi_orth(1:N_states)
+  psi_s2(1:N_states) = s2_eigvec_tc_bi_orth(1:N_states)
+
   E_tc  = eigval_right_tc_bi_orth(1)
   norm  = norm_ground_left_right_bi_orth
   ndet  = N_det
@@ -50,7 +53,7 @@ subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
     enddo
   enddo
   SOFT_TOUCH eigval_left_tc_bi_orth  eigval_right_tc_bi_orth leigvec_tc_bi_orth reigvec_tc_bi_orth norm_ground_left_right_bi_orth 
-  SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho psi_coef  
+  SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho psi_coef psi_energy psi_s2
 
   call save_tc_bi_ortho_wavefunction
 end
diff --git a/src/fci_tc_bi/pt2_tc.irp.f b/src/fci_tc_bi/pt2_tc.irp.f
new file mode 100644
index 00000000..96a54825
--- /dev/null
+++ b/src/fci_tc_bi/pt2_tc.irp.f
@@ -0,0 +1,31 @@
+program tc_pt2_prog
+  implicit none
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid 
+  pruning = -1.d0
+  touch pruning
+!  pt2_relative_error = 0.01d0
+!  touch pt2_relative_error
+  call run_pt2_tc
+
+end
+
+
+subroutine run_pt2_tc
+
+ implicit none
+
+  PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e
+  if(elec_alpha_num+elec_beta_num.ge.3)then
+    if(three_body_h_tc)then
+      call provide_all_three_ints_bi_ortho
+    endif
+  endif
+  ! ---
+
+ call tc_pt2
+
+
+end
diff --git a/src/kohn_sham/print_mos.irp.f b/src/kohn_sham/print_mos.irp.f
index 7105c989..19bb98bc 100644
--- a/src/kohn_sham/print_mos.irp.f
+++ b/src/kohn_sham/print_mos.irp.f
@@ -3,7 +3,7 @@ program print_mos
  integer :: i,nx
  double precision :: r(3), xmax, dx, accu
  double precision, allocatable :: mos_array(:)
- double precision:: alpha,envelop
+ double precision:: alpha,envelop,dm_a,dm_b
  allocate(mos_array(mo_num))
  xmax = 5.d0
  nx = 1000
@@ -11,11 +11,12 @@ program print_mos
  r = 0.d0
  alpha = 0.5d0
  do i = 1, nx
+  call dm_dft_alpha_beta_at_r(r,dm_a,dm_b)
   call give_all_mos_at_r(r,mos_array)
   accu = mos_array(3)**2+mos_array(4)**2+mos_array(5)**2
   accu = dsqrt(accu)
   envelop = (1.d0 - dexp(-alpha * r(3)**2))
-  write(33,'(100(F16.10,X))')r(3), mos_array(1), mos_array(2), accu, envelop
+  write(33,'(100(F16.10,X))')r(3), mos_array(1), mos_array(2), accu, dm_a+dm_b, envelop
   r(3) += dx
  enddo
 
diff --git a/src/mo_basis/EZFIO.cfg b/src/mo_basis/EZFIO.cfg
index 81ffba5c..4c4f1eca 100644
--- a/src/mo_basis/EZFIO.cfg
+++ b/src/mo_basis/EZFIO.cfg
@@ -9,6 +9,12 @@ doc: Coefficient of the i-th |AO| on the j-th |MO|
 interface: ezfio
 size: (ao_basis.ao_num,mo_basis.mo_num)
 
+[mo_coef_aux]
+type: double precision
+doc: AUX Coefficient of the i-th |AO| on the j-th |MO|
+interface: ezfio
+size: (ao_basis.ao_num,mo_basis.mo_num)
+
 [mo_coef_imag]
 type: double precision
 doc: Imaginary part of the MO coefficient of the i-th |AO| on the j-th |MO|
diff --git a/src/mo_basis/mos_aux.irp.f b/src/mo_basis/mos_aux.irp.f
new file mode 100644
index 00000000..27a874b1
--- /dev/null
+++ b/src/mo_basis/mos_aux.irp.f
@@ -0,0 +1,53 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, mo_coef_aux, (ao_num,mo_num)]
+
+  implicit none
+  integer                        :: i, j
+  logical                        :: exists
+  double precision, allocatable  :: buffer(:,:)
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    ! Coefs
+    call ezfio_has_mo_basis_mo_coef_aux(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(exists, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read mo_coef_aux with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+    if (mpi_master) then
+      call ezfio_get_mo_basis_mo_coef_aux(mo_coef_aux)
+      write(*,*) 'Read  mo_coef_aux'
+    endif
+    IRP_IF MPI
+      call MPI_BCAST(mo_coef_aux, mo_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        stop 'Unable to read mo_coef_aux with MPI'
+      endif
+    IRP_ENDIF
+  else
+    ! Orthonormalized AO basis
+    do i = 1, mo_num
+      do j = 1, ao_num
+        mo_coef_aux(j,i) = ao_ortho_canonical_coef(j,i)
+      enddo
+    enddo
+  endif
+
+END_PROVIDER
+
diff --git a/src/non_h_ints_mu/debug_fit.irp.f b/src/non_h_ints_mu/debug_fit.irp.f
index af441335..146028d5 100644
--- a/src/non_h_ints_mu/debug_fit.irp.f
+++ b/src/non_h_ints_mu/debug_fit.irp.f
@@ -18,13 +18,13 @@ program debug_fit
   PROVIDE mu_erf j1b_pen
 
   !call test_j1b_nucl()
-  call test_grad_j1b_nucl()
+  !call test_grad_j1b_nucl()
   !call test_lapl_j1b_nucl()
 
   !call test_list_b2()
-  !call test_list_b3()
+  call test_list_b3()
 
-  call test_fit_u()
+  !call test_fit_u()
   !call test_fit_u2()
   !call test_fit_ugradu()
 
@@ -82,9 +82,9 @@ subroutine test_grad_j1b_nucl()
   integer                    :: ipoint
   double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
   double precision           :: r(3)
-  double precision, external :: grad_x_j1b_nucl
-  double precision, external :: grad_y_j1b_nucl
-  double precision, external :: grad_z_j1b_nucl
+  double precision, external :: grad_x_j1b_nucl_num
+  double precision, external :: grad_y_j1b_nucl_num
+  double precision, external :: grad_z_j1b_nucl_num
 
   print*, ' test_grad_j1b_nucl ...'
 
@@ -101,7 +101,7 @@ subroutine test_grad_j1b_nucl()
     r(3) = final_grid_points(3,ipoint)
 
     i_exc  = v_1b_grad(1,ipoint) 
-    i_num  = grad_x_j1b_nucl(r)
+    i_num  = grad_x_j1b_nucl_num(r)
     acc_ij = dabs(i_exc - i_num)
     if(acc_ij .gt. eps_ij) then
       print *, ' problem in x of v_1b_grad on', ipoint
@@ -111,7 +111,7 @@ subroutine test_grad_j1b_nucl()
     endif
 
     i_exc  = v_1b_grad(2,ipoint) 
-    i_num  = grad_y_j1b_nucl(r)
+    i_num  = grad_y_j1b_nucl_num(r)
     acc_ij = dabs(i_exc - i_num)
     if(acc_ij .gt. eps_ij) then
       print *, ' problem in y of v_1b_grad on', ipoint
@@ -121,7 +121,7 @@ subroutine test_grad_j1b_nucl()
     endif
 
     i_exc  = v_1b_grad(3,ipoint) 
-    i_num  = grad_z_j1b_nucl(r)
+    i_num  = grad_z_j1b_nucl_num(r)
     acc_ij = dabs(i_exc - i_num)
     if(acc_ij .gt. eps_ij) then
       print *, ' problem in z of v_1b_grad on', ipoint
@@ -236,16 +236,25 @@ subroutine test_list_b3()
   integer                    :: ipoint
   double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_tmp, i_num, normalz
   double precision           :: r(3)
-  double precision, external :: j1b_nucl
+  double precision           :: grad_num(3), eps_der, eps_lap, tmp_der, tmp_lap, i0, ip, im
+  double precision, external :: j1b_nucl_square
 
   print*, ' test_list_b3 ...'
 
+  eps_ij  = 1d-7
+
+  eps_der = 1d-5
+  tmp_der = 0.5d0 / eps_der
+
+  eps_lap = 1d-4
+  tmp_lap = 1.d0 / (eps_lap*eps_lap)
+
+  ! ---
+
   PROVIDE v_1b_list_b3
 
-  eps_ij  = 1d-7
   acc_tot = 0.d0
   normalz = 0.d0
-
   do ipoint = 1, n_points_final_grid
 
     r(1) = final_grid_points(1,ipoint)
@@ -253,11 +262,12 @@ subroutine test_list_b3()
     r(3) = final_grid_points(3,ipoint)
 
     i_exc  = v_1b_list_b3(ipoint) 
-    i_tmp  = j1b_nucl(r)
-    i_num  = i_tmp * i_tmp
+    i_num  = j1b_nucl_square(r)
     acc_ij = dabs(i_exc - i_num)
     if(acc_ij .gt. eps_ij) then
       print *, ' problem in list_b3 on', ipoint
+      print *, ' r      = ', r
+      print *, ' r2     = ', r(1)*r(1) + r(2)*r(2) + r(3)*r(3)
       print *, ' analyt = ', i_exc
       print *, ' numeri = ', i_num
       print *, ' diff   = ', acc_ij
@@ -267,8 +277,136 @@ subroutine test_list_b3()
     normalz += dabs(i_num)
   enddo
 
-  print*, ' acc_tot = ', acc_tot
-  print*, ' normalz = ', normalz
+  print*, ' acc_tot on val = ', acc_tot
+  print*, ' normalz on val = ', normalz
+
+  ! ---
+
+  PROVIDE v_1b_square_grad
+
+  acc_tot = 0.d0
+  normalz = 0.d0
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b_square_grad(ipoint,1)
+    r(1)   = r(1) + eps_der
+    ip     = j1b_nucl_square(r)
+    r(1)   = r(1) - 2.d0 * eps_der
+    im     = j1b_nucl_square(r)
+    r(1)   = r(1) + eps_der
+    i_num  = tmp_der * (ip - im)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in grad_x list_b3 on', ipoint
+      print *, ' r      = ', r
+      print *, ' r2     = ', r(1)*r(1) + r(2)*r(2) + r(3)*r(3)
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+
+    i_exc  = v_1b_square_grad(ipoint,2)
+    r(2)   = r(2) + eps_der
+    ip     = j1b_nucl_square(r)
+    r(2)   = r(2) - 2.d0 * eps_der
+    im     = j1b_nucl_square(r)
+    r(2)   = r(2) + eps_der
+    i_num  = tmp_der * (ip - im)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in grad_y list_b3 on', ipoint
+      print *, ' r      = ', r
+      print *, ' r2     = ', r(1)*r(1) + r(2)*r(2) + r(3)*r(3)
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+
+    i_exc  = v_1b_square_grad(ipoint,3)
+    r(3)   = r(3) + eps_der
+    ip     = j1b_nucl_square(r)
+    r(3)   = r(3) - 2.d0 * eps_der
+    im     = j1b_nucl_square(r)
+    r(3)   = r(3) + eps_der
+    i_num  = tmp_der * (ip - im)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in grad_z list_b3 on', ipoint
+      print *, ' r      = ', r
+      print *, ' r2     = ', r(1)*r(1) + r(2)*r(2) + r(3)*r(3)
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot on grad = ', acc_tot
+  print*, ' normalz on grad = ', normalz
+
+  ! ---
+
+  PROVIDE v_1b_square_lapl
+
+  acc_tot = 0.d0
+  normalz = 0.d0
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+    i0   = j1b_nucl_square(r)
+
+    i_exc = v_1b_square_lapl(ipoint)
+
+    r(1)  = r(1) + eps_lap
+    ip    = j1b_nucl_square(r)
+    r(1)  = r(1) - 2.d0 * eps_lap
+    im    = j1b_nucl_square(r)
+    r(1)  = r(1) + eps_lap
+    i_num = tmp_lap * (ip - 2.d0 * i0 + im)
+
+    r(2)  = r(2) + eps_lap
+    ip    = j1b_nucl_square(r)
+    r(2)  = r(2) - 2.d0 * eps_lap
+    im    = j1b_nucl_square(r)
+    r(2)  = r(2) + eps_lap
+    i_num = i_num + tmp_lap * (ip - 2.d0 * i0 + im)
+
+    r(3)  = r(3) + eps_lap
+    ip    = j1b_nucl_square(r)
+    r(3)  = r(3) - 2.d0 * eps_lap
+    im    = j1b_nucl_square(r)
+    r(3)  = r(3) + eps_lap
+    i_num = i_num + tmp_lap * (ip - 2.d0 * i0 + im)
+
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in lapl list_b3 on', ipoint
+      print *, ' r      = ', r
+      print *, ' r2     = ', r(1)*r(1) + r(2)*r(2) + r(3)*r(3)
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot on lapl = ', acc_tot
+  print*, ' normalz on lapl = ', normalz
+
+  ! ---
 
   return
 end subroutine test_list_b3
@@ -317,7 +455,7 @@ subroutine test_fit_ugradu()
       i_fit = i_fit / dsqrt(x2)
   
       tmp = j12_mu(r1, r2) 
-      call grad1_j12_mu_exc(r1, r2, grad)
+      call grad1_j12_mu(r1, r2, grad)
   
       ! ---
   
diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f
index 1fd39f6a..3f1a9bf5 100644
--- a/src/non_h_ints_mu/grad_squared.irp.f
+++ b/src/non_h_ints_mu/grad_squared.irp.f
@@ -17,7 +17,7 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi
   !
   ! if J(r1,r2) = u12 x v1 x v2
   !
-  ! gradu_squared_u_ij_mu = -0.50 x \int r2 \phi_i(2) \phi_j(2) [ v1^2  v2^2 ((grad_1 u12)^2 + (grad_2 u12^2)]) + u12^2  v2^2 (grad_1 v1)^2 + 2 u12 v1 v2^2 (grad_1 u12) . (grad_1 v1) ]
+  ! gradu_squared_u_ij_mu = -0.50 x \int r2 \phi_i(2) \phi_j(2) [ v1^2  v2^2 ((grad_1 u12)^2 + (grad_2 u12^2)) + u12^2  v2^2 (grad_1 v1)^2 + 2 u12 v1 v2^2 (grad_1 u12) . (grad_1 v1) ]
   !                       = -0.25 x        v1^2      \int r2 \phi_i(2) \phi_j(2) [1 - erf(mu r12)]^2 v2^2
   !                       + -0.50 x    (grad_1 v1)^2 \int r2 \phi_i(2) \phi_j(2)      u12^2          v2^2
   !                       + -1.00 x v1 (grad_1 v1)   \int r2 \phi_i(2) \phi_j(2)   (grad_1 u12)      v2^2
@@ -232,37 +232,33 @@ BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_g
 
   PROVIDE j1b_type
 
-  if(j1b_type .eq. 3) then
-
-    do ipoint = 1, n_points_final_grid
-      tmp1 = v_1b(ipoint)
-      tmp1 = tmp1 * tmp1
-      do j = 1, ao_num
-        do i = 1, ao_num
-          grad12_j12(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
-        enddo
+  do ipoint = 1, n_points_final_grid
+    tmp1 = v_1b(ipoint)
+    tmp1 = tmp1 * tmp1
+    do j = 1, ao_num
+      do i = 1, ao_num
+        grad12_j12(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
       enddo
     enddo
+  enddo
 
-  else
-
-    grad12_j12 = 0.d0
-    do ipoint = 1, n_points_final_grid
-      r(1) = final_grid_points(1,ipoint)
-      r(2) = final_grid_points(2,ipoint)
-      r(3) = final_grid_points(3,ipoint)
-      do j = 1, ao_num
-        do i = 1, ao_num
-          do igauss = 1, n_max_fit_slat
-            delta = expo_gauss_1_erf_x_2(igauss)
-            coef  = coef_gauss_1_erf_x_2(igauss)
-            grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
-          enddo
-        enddo
-      enddo
-    enddo
-
-  endif
+  !if(j1b_type .eq. 0) then 
+  !  grad12_j12 = 0.d0
+  !  do ipoint = 1, n_points_final_grid
+  !    r(1) = final_grid_points(1,ipoint)
+  !    r(2) = final_grid_points(2,ipoint)
+  !    r(3) = final_grid_points(3,ipoint)
+  !    do j = 1, ao_num
+  !      do i = 1, ao_num
+  !        do igauss = 1, n_max_fit_slat
+  !          delta = expo_gauss_1_erf_x_2(igauss)
+  !          coef  = coef_gauss_1_erf_x_2(igauss)
+  !          grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !endif
 
   call wall_time(time1)
   print*, ' Wall time for grad12_j12 = ', time1 - time0
@@ -271,7 +267,7 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_grid) ]
+BEGIN_PROVIDER [double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_grid)]
 
   implicit none
   integer                    :: ipoint, i, j
@@ -351,17 +347,19 @@ END_PROVIDER
 
 ! ---
 
+
 BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
   !
-  ! tc_grad_square_ao(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
+  ! tc_grad_square_ao(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
   !
   END_DOC
 
   implicit none
   integer                       :: ipoint, i, j, k, l
-  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: weight1, ao_k_r, ao_i_r
+  double precision              :: der_envsq_x, der_envsq_y, der_envsq_z, lap_envsq
   double precision              :: time0, time1
   double precision, allocatable :: b_mat(:,:,:), tmp(:,:,:)
 
@@ -376,14 +374,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
 
   else
 
-    allocate(b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+    ! ---
+
+    PROVIDE int2_grad1_u12_square_ao
+
+    allocate(b_mat(n_points_final_grid,ao_num,ao_num))
 
     b_mat = 0.d0
-   !$OMP PARALLEL               &
-   !$OMP DEFAULT (NONE)         &
-   !$OMP PRIVATE (i, k, ipoint) &
-   !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
-   !$OMP DO SCHEDULE (static)
+    !$OMP PARALLEL               &
+    !$OMP DEFAULT (NONE)         &
+    !$OMP PRIVATE (i, k, ipoint) &
+    !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+    !$OMP DO SCHEDULE (static)
     do i = 1, ao_num
       do k = 1, ao_num
         do ipoint = 1, n_points_final_grid
@@ -391,30 +393,60 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
         enddo
       enddo
     enddo
-   !$OMP END DO
-   !$OMP END PARALLEL
-
-    tmp = 0.d0
-   !$OMP PARALLEL               &
-   !$OMP DEFAULT (NONE)         &
-   !$OMP PRIVATE (j, l, ipoint) &
-   !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12)
-   !$OMP DO SCHEDULE (static)
-    do ipoint = 1, n_points_final_grid
-      do j = 1, ao_num
-        do l = 1, ao_num
-          tmp(l,j,ipoint) = u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) + 0.5d0 * grad12_j12(l,j,ipoint)
-        enddo
-      enddo
-    enddo
-   !$OMP END DO
-   !$OMP END PARALLEL
+    !$OMP END DO
+    !$OMP END PARALLEL
 
     tc_grad_square_ao = 0.d0
-    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
-              , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
-              , 1.d0, tc_grad_square_ao, ao_num*ao_num)
-    deallocate(tmp, b_mat)
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0                 &
+              , int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid &
+              , 0.d0, tc_grad_square_ao, ao_num*ao_num)
+
+    ! ---
+
+    if(((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) .and. use_ipp) then
+
+      print*, " going through Manu's IPP"
+
+      ! an additional term is added here directly instead of 
+      ! being added in int2_grad1_u12_square_ao for performance
+      ! note that the factor
+
+      PROVIDE int2_u2_j1b2
+
+      b_mat = 0.d0
+      !$OMP PARALLEL                                                                                     &
+      !$OMP DEFAULT (NONE)                                                                               &
+      !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                                              &
+      !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector, &
+      !$OMP         v_1b_square_grad, v_1b_square_lapl, aos_grad_in_r_array_transp_bis)
+      !$OMP DO SCHEDULE (static)
+      do i = 1, ao_num
+        do k = 1, ao_num
+          do ipoint = 1, n_points_final_grid
+
+            weight1 = 0.25d0 * final_weight_at_r_vector(ipoint)
+
+            ao_i_r = aos_in_r_array_transp(ipoint,i)
+            ao_k_r = aos_in_r_array_transp(ipoint,k)
+
+            b_mat(ipoint,k,i) = weight1 * ( ao_k_r * ao_i_r * v_1b_square_lapl(ipoint)                                                                                   &
+                              + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) * v_1b_square_grad(ipoint,1) &
+                              + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) * v_1b_square_grad(ipoint,2) &
+                              + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) * v_1b_square_grad(ipoint,3) )
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      !$OMP END PARALLEL
+
+      call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0     &
+                , int2_u2_j1b2(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid &
+                , 1.d0, tc_grad_square_ao, ao_num*ao_num)
+    endif
+
+    ! ---
+
+    deallocate(b_mat)
 
     call sum_A_At(tc_grad_square_ao(1,1,1,1), ao_num*ao_num)
 
@@ -450,3 +482,4 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
 END_PROVIDER
 
 ! ---
+
diff --git a/src/non_h_ints_mu/grad_tc_int.irp.f b/src/non_h_ints_mu/grad_tc_int.irp.f
index cb3b71a3..f4eb02e2 100644
--- a/src/non_h_ints_mu/grad_tc_int.irp.f
+++ b/src/non_h_ints_mu/grad_tc_int.irp.f
@@ -16,9 +16,11 @@ BEGIN_PROVIDER [double precision, ao_non_hermit_term_chemist, (ao_num, ao_num, a
   double precision              :: wall1, wall0
   double precision, allocatable :: b_mat(:,:,:,:), ac_mat(:,:,:,:)
 
+  print*, ' providing ao_non_hermit_term_chemist ...'
+  call wall_time(wall0)
+
   provide v_ij_erf_rk_cst_mu x_v_ij_erf_rk_cst_mu
 
-  call wall_time(wall0)
   allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
 
  !$OMP PARALLEL                         &
@@ -102,7 +104,7 @@ BEGIN_PROVIDER [double precision, ao_non_hermit_term_chemist, (ao_num, ao_num, a
  !$OMP END PARALLEL
 
   call wall_time(wall1)
-  print *, ' wall time dgemm ', wall1 - wall0
+  print *, ' wall time for ao_non_hermit_term_chemist ', wall1 - wall0
 
 END_PROVIDER 
 
diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f
index a515e0b8..9b91a8ed 100644
--- a/src/non_h_ints_mu/j12_nucl_utils.irp.f
+++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f
@@ -8,79 +8,160 @@ BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)]
   double precision :: x, y, z, dx, dy, dz
   double precision :: a, d, e, fact_r
 
-  do ipoint = 1, n_points_final_grid
+  if(j1b_type .eq. 3) then
 
-    x = final_grid_points(1,ipoint)
-    y = final_grid_points(2,ipoint)
-    z = final_grid_points(3,ipoint)
+    ! v(r) = \Pi_{a} [1 - \exp(-\alpha_a (r - r_a)^2)]
 
-    fact_r = 1.d0
-    do j = 1, nucl_num
-      a  = j1b_pen(j)
-      dx = x - nucl_coord(j,1)
-      dy = y - nucl_coord(j,2)
-      dz = z - nucl_coord(j,3)
-      d  = dx*dx + dy*dy + dz*dz
-      e  = 1.d0 - dexp(-a*d)
+    do ipoint = 1, n_points_final_grid
 
-      fact_r = fact_r * e
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      fact_r = 1.d0
+      do j = 1, nucl_num
+        a  = j1b_pen(j)
+        dx = x - nucl_coord(j,1)
+        dy = y - nucl_coord(j,2)
+        dz = z - nucl_coord(j,3)
+        d  = dx*dx + dy*dy + dz*dz
+        e  = 1.d0 - dexp(-a*d)
+
+        fact_r = fact_r * e
+      enddo
+
+      v_1b(ipoint) = fact_r
     enddo
 
-    v_1b(ipoint) = fact_r
-  enddo
+  elseif(j1b_type .eq. 4) then
+
+    ! v(r) = 1 - \sum_{a} \exp(-\alpha_a (r - r_a)^2)
+
+    do ipoint = 1, n_points_final_grid
+
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      fact_r = 1.d0
+      do j = 1, nucl_num
+        a  = j1b_pen(j)
+        dx = x - nucl_coord(j,1)
+        dy = y - nucl_coord(j,2)
+        dz = z - nucl_coord(j,3)
+        d  = dx*dx + dy*dy + dz*dz
+
+        fact_r = fact_r - dexp(-a*d)
+      enddo
+
+      v_1b(ipoint) = fact_r
+    enddo
+
+  else
+
+    print*, 'j1b_type = ', j1b_type, 'is not implemented for v_1b'
+    stop
+
+  endif
 
 END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, v_1b_grad, (3, n_points_final_grid)]
+BEGIN_PROVIDER [double precision, v_1b_grad, (3, n_points_final_grid)]
 
   implicit none
   integer          :: ipoint, i, j, phase
-  double precision :: x, y, z, dx, dy, dz
+  double precision :: x, y, z, dx, dy, dz, r2
   double precision :: a, d, e
   double precision :: fact_x, fact_y, fact_z
   double precision :: ax_der, ay_der, az_der, a_expo
 
-  do ipoint = 1, n_points_final_grid
+  PROVIDE j1b_type
 
-    x = final_grid_points(1,ipoint)
-    y = final_grid_points(2,ipoint)
-    z = final_grid_points(3,ipoint)
+  if(j1b_type .eq. 3) then
 
-    fact_x = 0.d0
-    fact_y = 0.d0
-    fact_z = 0.d0
-    do i = 1, List_all_comb_b2_size
+    ! v(r) = \Pi_{a} [1 - \exp(-\alpha_a (r - r_a)^2)]
 
-      phase  = 0
-      a_expo = 0.d0
-      ax_der = 0.d0
-      ay_der = 0.d0
-      az_der = 0.d0
+    do ipoint = 1, n_points_final_grid
+
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      fact_x = 0.d0
+      fact_y = 0.d0
+      fact_z = 0.d0
+      do i = 1, List_all_comb_b2_size
+
+        phase  = 0
+        a_expo = 0.d0
+        ax_der = 0.d0
+        ay_der = 0.d0
+        az_der = 0.d0
+        do j = 1, nucl_num
+          a  = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+          dx = x - nucl_coord(j,1)
+          dy = y - nucl_coord(j,2)
+          dz = z - nucl_coord(j,3)
+        
+          phase  += List_all_comb_b2(j,i)
+          a_expo += a * (dx*dx + dy*dy + dz*dz)
+          ax_der += a * dx
+          ay_der += a * dy
+          az_der += a * dz
+        enddo
+        e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo)
+
+        fact_x += e * ax_der 
+        fact_y += e * ay_der 
+        fact_z += e * az_der 
+      enddo
+
+      v_1b_grad(1,ipoint) = fact_x
+      v_1b_grad(2,ipoint) = fact_y
+      v_1b_grad(3,ipoint) = fact_z
+    enddo
+
+  elseif(j1b_type .eq. 4) then
+
+    ! v(r) = 1 - \sum_{a} \exp(-\alpha_a (r - r_a)^2)
+
+    do ipoint = 1, n_points_final_grid
+
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      ax_der = 0.d0 
+      ay_der = 0.d0 
+      az_der = 0.d0 
       do j = 1, nucl_num
-        a  = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+
         dx = x - nucl_coord(j,1)
         dy = y - nucl_coord(j,2)
         dz = z - nucl_coord(j,3)
-      
-        phase  += List_all_comb_b2(j,i)
-        a_expo += a * (dx*dx + dy*dy + dz*dz)
-        ax_der += a * dx
-        ay_der += a * dy
-        az_der += a * dz
-      enddo
-      e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo)
+        r2 = dx*dx + dy*dy + dz*dz
 
-      fact_x += e * ax_der 
-      fact_y += e * ay_der 
-      fact_z += e * az_der 
+        a = j1b_pen(j)
+        e = a * dexp(-a * r2)
+
+        ax_der += e * dx
+        ay_der += e * dy
+        az_der += e * dz
+      enddo
+
+      v_1b_grad(1,ipoint) = 2.d0 * ax_der
+      v_1b_grad(2,ipoint) = 2.d0 * ay_der
+      v_1b_grad(3,ipoint) = 2.d0 * az_der
     enddo
 
-    v_1b_grad(1,ipoint) = fact_x
-    v_1b_grad(2,ipoint) = fact_y
-    v_1b_grad(3,ipoint) = fact_z
-  enddo
+  else
+
+    print*, 'j1b_type = ', j1b_type, 'is not implemented'
+    stop
+
+  endif
 
 END_PROVIDER
 
@@ -91,7 +172,7 @@ BEGIN_PROVIDER [ double precision, v_1b_lapl, (n_points_final_grid)]
   implicit none
   integer          :: ipoint, i, j, phase
   double precision :: x, y, z, dx, dy, dz
-  double precision :: a, d, e, b
+  double precision :: a, e, b
   double precision :: fact_r
   double precision :: ax_der, ay_der, az_der, a_expo
 
@@ -204,36 +285,53 @@ END_PROVIDER
 
 ! ---
 
-double precision function jmu_modif(r1, r2)
+ BEGIN_PROVIDER [double precision, v_1b_square_grad, (n_points_final_grid,3)]
+&BEGIN_PROVIDER [double precision, v_1b_square_lapl, (n_points_final_grid)  ]
 
   implicit none
-  double precision, intent(in) :: r1(3), r2(3)
-  double precision, external   :: j12_mu, j12_nucl
+  integer          :: ipoint, i
+  double precision :: x, y, z, dx, dy, dz, r2
+  double precision :: coef, expo, a_expo, tmp
+  double precision :: fact_x, fact_y, fact_z, fact_r
 
-  jmu_modif = j12_mu(r1, r2) * j12_nucl(r1, r2)
+  PROVIDE List_all_comb_b3_coef List_all_comb_b3_expo List_all_comb_b3_cent
 
-  return
-end function jmu_modif
+  do ipoint = 1, n_points_final_grid
 
-! ---
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
 
-double precision function j12_mu(r1, r2)
+    fact_x = 0.d0
+    fact_y = 0.d0
+    fact_z = 0.d0
+    fact_r = 0.d0
+    do i = 1, List_all_comb_b3_size
 
-  include 'constants.include.F'
+      coef = List_all_comb_b3_coef(i)
+      expo = List_all_comb_b3_expo(i)
 
-  implicit none
-  double precision, intent(in) :: r1(3), r2(3)
-  double precision             :: mu_r12, r12
+      dx = x - List_all_comb_b3_cent(1,i)
+      dy = y - List_all_comb_b3_cent(2,i)
+      dz = z - List_all_comb_b3_cent(3,i)
+      r2 = dx * dx + dy * dy + dz * dz
 
-  r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) &
-             + (r1(2) - r2(2)) * (r1(2) - r2(2)) &
-             + (r1(3) - r2(3)) * (r1(3) - r2(3)) )
-  mu_r12 = mu_erf * r12
+      a_expo = expo * r2
+      tmp    = coef * expo * dexp(-a_expo)
 
-  j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf
+      fact_x += tmp * dx
+      fact_y += tmp * dy
+      fact_z += tmp * dz
+      fact_r += tmp * (3.d0 - 2.d0 * a_expo)
+    enddo
 
-  return
-end function j12_mu
+    v_1b_square_grad(ipoint,1) = -2.d0 * fact_x
+    v_1b_square_grad(ipoint,2) = -2.d0 * fact_y
+    v_1b_square_grad(ipoint,3) = -2.d0 * fact_z
+    v_1b_square_lapl(ipoint)   = -2.d0 * fact_r
+  enddo
+
+END_PROVIDER
 
 ! ---
 
@@ -254,6 +352,19 @@ end function j12_mu_r12
 
 ! ---
 
+double precision function jmu_modif(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision, external   :: j12_mu, j12_nucl
+
+  jmu_modif = j12_mu(r1, r2) * j12_nucl(r1, r2)
+
+  return
+end function jmu_modif
+
+! ---
+
 double precision function j12_mu_gauss(r1, r2)
 
   implicit none
@@ -278,30 +389,6 @@ end function j12_mu_gauss
 
 ! ---
 
-double precision function j1b_nucl(r)
-
-  implicit none
-  double precision, intent(in) :: r(3)
-  integer                      :: i
-  double precision             :: a, d, e
-
-  j1b_nucl = 1.d0
-
-  do i = 1, nucl_num
-    a = j1b_pen(i)
-    d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) &
-        + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) &
-        + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) )
-    e = 1.d0 - exp(-a*d)
-
-    j1b_nucl = j1b_nucl * e
-  enddo
-
-  return
-end function j1b_nucl
-
-! ---
-
 double precision function j12_nucl(r1, r2)
 
   implicit none
@@ -317,7 +404,7 @@ end function j12_nucl
 
 ! ---------------------------------------------------------------------------------------
 
-double precision function grad_x_j1b_nucl(r)
+double precision function grad_x_j1b_nucl_num(r)
 
   implicit none
   double precision, intent(in) :: r(3)
@@ -333,12 +420,12 @@ double precision function grad_x_j1b_nucl(r)
   r_eps(1) = r_eps(1) - 2.d0 * delta
   fm       = j1b_nucl(r_eps)
 
-  grad_x_j1b_nucl = 0.5d0 * (fp - fm) / delta
+  grad_x_j1b_nucl_num = 0.5d0 * (fp - fm) / delta
 
   return
-end function grad_x_j1b_nucl
+end function grad_x_j1b_nucl_num
 
-double precision function grad_y_j1b_nucl(r)
+double precision function grad_y_j1b_nucl_num(r)
 
   implicit none
   double precision, intent(in) :: r(3)
@@ -354,12 +441,12 @@ double precision function grad_y_j1b_nucl(r)
   r_eps(2) = r_eps(2) - 2.d0 * delta
   fm       = j1b_nucl(r_eps)
 
-  grad_y_j1b_nucl = 0.5d0 * (fp - fm) / delta
+  grad_y_j1b_nucl_num = 0.5d0 * (fp - fm) / delta
 
   return
-end function grad_y_j1b_nucl
+end function grad_y_j1b_nucl_num
 
-double precision function grad_z_j1b_nucl(r)
+double precision function grad_z_j1b_nucl_num(r)
 
   implicit none
   double precision, intent(in) :: r(3)
@@ -375,10 +462,10 @@ double precision function grad_z_j1b_nucl(r)
   r_eps(3) = r_eps(3) - 2.d0 * delta
   fm       = j1b_nucl(r_eps)
 
-  grad_z_j1b_nucl = 0.5d0 * (fp - fm) / delta
+  grad_z_j1b_nucl_num = 0.5d0 * (fp - fm) / delta
 
   return
-end function grad_z_j1b_nucl
+end function grad_z_j1b_nucl_num
 
 ! ---------------------------------------------------------------------------------------
 
@@ -389,9 +476,9 @@ double precision function lapl_j1b_nucl(r)
   implicit none
   double precision, intent(in) :: r(3)
   double precision             :: r_eps(3), eps, fp, fm, delta
-  double precision, external   :: grad_x_j1b_nucl
-  double precision, external   :: grad_y_j1b_nucl
-  double precision, external   :: grad_z_j1b_nucl
+  double precision, external   :: grad_x_j1b_nucl_num
+  double precision, external   :: grad_y_j1b_nucl_num
+  double precision, external   :: grad_z_j1b_nucl_num
 
   eps   = 1d-5
   r_eps = r
@@ -402,9 +489,9 @@ double precision function lapl_j1b_nucl(r)
 
   delta    = max(eps, dabs(eps*r(1)))
   r_eps(1) = r_eps(1) + delta
-  fp       = grad_x_j1b_nucl(r_eps)
+  fp       = grad_x_j1b_nucl_num(r_eps)
   r_eps(1) = r_eps(1) - 2.d0 * delta
-  fm       = grad_x_j1b_nucl(r_eps)
+  fm       = grad_x_j1b_nucl_num(r_eps)
   r_eps(1) = r_eps(1) + delta
 
   lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta
@@ -413,9 +500,9 @@ double precision function lapl_j1b_nucl(r)
 
   delta    = max(eps, dabs(eps*r(2)))
   r_eps(2) = r_eps(2) + delta
-  fp       = grad_y_j1b_nucl(r_eps)
+  fp       = grad_y_j1b_nucl_num(r_eps)
   r_eps(2) = r_eps(2) - 2.d0 * delta
-  fm       = grad_y_j1b_nucl(r_eps)
+  fm       = grad_y_j1b_nucl_num(r_eps)
   r_eps(2) = r_eps(2) + delta
 
   lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta
@@ -424,9 +511,9 @@ double precision function lapl_j1b_nucl(r)
 
   delta    = max(eps, dabs(eps*r(3)))
   r_eps(3) = r_eps(3) + delta
-  fp       = grad_z_j1b_nucl(r_eps)
+  fp       = grad_z_j1b_nucl_num(r_eps)
   r_eps(3) = r_eps(3) - 2.d0 * delta
-  fm       = grad_z_j1b_nucl(r_eps)
+  fm       = grad_z_j1b_nucl_num(r_eps)
   r_eps(3) = r_eps(3) + delta
 
   lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta
@@ -574,35 +661,6 @@ end function grad1_z_j12_mu_num
 
 ! ---------------------------------------------------------------------------------------
 
-! ---
-
-subroutine grad1_j12_mu_exc(r1, r2, grad)
-
-  implicit none
-  double precision, intent(in)  :: r1(3), r2(3)
-  double precision, intent(out) :: grad(3)
-  double precision              :: dx, dy, dz, r12, tmp
-
-  grad = 0.d0
-
-  dx = r1(1) - r2(1)
-  dy = r1(2) - r2(2)
-  dz = r1(3) - r2(3)
-
-  r12 = dsqrt( dx * dx + dy * dy + dz * dz )
-  if(r12 .lt. 1d-10) return
-
-  tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12
-
-  grad(1) = tmp * dx 
-  grad(2) = tmp * dy 
-  grad(3) = tmp * dz 
-
-  return
-end subroutine grad1_j12_mu_exc
-
-! ---
-
 subroutine grad1_jmu_modif_num(r1, r2, grad)
 
   implicit none
@@ -614,11 +672,11 @@ subroutine grad1_jmu_modif_num(r1, r2, grad)
 
   double precision, external    :: j12_mu
   double precision, external    :: j1b_nucl
-  double precision, external    :: grad_x_j1b_nucl
-  double precision, external    :: grad_y_j1b_nucl
-  double precision, external    :: grad_z_j1b_nucl
+  double precision, external    :: grad_x_j1b_nucl_num
+  double precision, external    :: grad_y_j1b_nucl_num
+  double precision, external    :: grad_z_j1b_nucl_num
 
-  call grad1_j12_mu_exc(r1, r2, grad_u12)
+  call grad1_j12_mu(r1, r2, grad_u12)
 
   tmp0 = j1b_nucl(r1) 
   tmp1 = j1b_nucl(r2)
@@ -626,9 +684,9 @@ subroutine grad1_jmu_modif_num(r1, r2, grad)
   tmp3 = tmp0 * tmp1
   tmp4 = tmp2 * tmp1
 
-  grad(1) = tmp3 * grad_u12(1) + tmp4 * grad_x_j1b_nucl(r1)
-  grad(2) = tmp3 * grad_u12(2) + tmp4 * grad_y_j1b_nucl(r1)
-  grad(3) = tmp3 * grad_u12(3) + tmp4 * grad_z_j1b_nucl(r1)
+  grad(1) = tmp3 * grad_u12(1) + tmp4 * grad_x_j1b_nucl_num(r1)
+  grad(2) = tmp3 * grad_u12(2) + tmp4 * grad_y_j1b_nucl_num(r1)
+  grad(3) = tmp3 * grad_u12(3) + tmp4 * grad_z_j1b_nucl_num(r1)
 
   return
 end subroutine grad1_jmu_modif_num
diff --git a/src/non_h_ints_mu/jast_deriv.irp.f b/src/non_h_ints_mu/jast_deriv.irp.f
new file mode 100644
index 00000000..5e99600e
--- /dev/null
+++ b/src/non_h_ints_mu/jast_deriv.irp.f
@@ -0,0 +1,851 @@
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, grad1_u12_num,         (n_points_extra_final_grid, n_points_final_grid, 3)]
+&BEGIN_PROVIDER [ double precision, grad1_u12_squared_num, (n_points_extra_final_grid, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! grad_1 u(r1,r2)
+  !
+  ! this will be integrated numerically over r2:
+  !   we use grid for r1 and extra_grid for r2
+  !
+  ! for 99 < j1b_type < 199
+  !
+  !   u(r1,r2)        = j12_mu(r12) x v(r1) x v(r2)
+  !   grad1 u(r1, r2) = [(grad1 j12_mu) v(r1) + j12_mu grad1 v(r1)] v(r2)
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: ipoint, jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: v1b_r1, v1b_r2, u2b_r12
+  double precision           :: grad1_v1b(3), grad1_u2b(3)
+  double precision           :: dx, dy, dz
+  double precision, external :: j12_mu, j1b_nucl
+
+  PROVIDE j1b_type
+  PROVIDE final_grid_points_extra
+
+  grad1_u12_num         = 0.d0
+  grad1_u12_squared_num = 0.d0
+
+  if(j1b_type .eq. 100) then
+
+    !$OMP PARALLEL                                                                                    &
+    !$OMP DEFAULT (NONE)                                                                              &
+    !$OMP PRIVATE (ipoint, jpoint, r1, r2, v1b_r1, v1b_r2, u2b_r12, grad1_v1b, grad1_u2b, dx, dy, dz) &
+    !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points,                  &
+    !$OMP         final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num)
+    !$OMP DO SCHEDULE (static)
+    do ipoint = 1, n_points_final_grid  ! r1
+
+      r1(1) = final_grid_points(1,ipoint)
+      r1(2) = final_grid_points(2,ipoint)
+      r1(3) = final_grid_points(3,ipoint)
+
+      do jpoint = 1, n_points_extra_final_grid ! r2 
+
+        r2(1) = final_grid_points_extra(1,jpoint)
+        r2(2) = final_grid_points_extra(2,jpoint)
+        r2(3) = final_grid_points_extra(3,jpoint)
+
+        call grad1_j12_mu(r1, r2, grad1_u2b)
+
+        dx = grad1_u2b(1)
+        dy = grad1_u2b(2)
+        dz = grad1_u2b(3)
+
+        grad1_u12_num(jpoint,ipoint,1) = dx 
+        grad1_u12_num(jpoint,ipoint,2) = dy 
+        grad1_u12_num(jpoint,ipoint,3) = dz 
+
+        grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+  elseif((j1b_type .gt. 100) .and. (j1b_type .lt. 200)) then
+
+    !$OMP PARALLEL                                                                                    &
+    !$OMP DEFAULT (NONE)                                                                              &
+    !$OMP PRIVATE (ipoint, jpoint, r1, r2, v1b_r1, v1b_r2, u2b_r12, grad1_v1b, grad1_u2b, dx, dy, dz) &
+    !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points,                  &
+    !$OMP         final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num)
+    !$OMP DO SCHEDULE (static)
+    do ipoint = 1, n_points_final_grid  ! r1
+
+      r1(1) = final_grid_points(1,ipoint)
+      r1(2) = final_grid_points(2,ipoint)
+      r1(3) = final_grid_points(3,ipoint)
+
+      v1b_r1 = j1b_nucl(r1)
+      call grad1_j1b_nucl(r1, grad1_v1b)
+
+      do jpoint = 1, n_points_extra_final_grid ! r2 
+
+        r2(1) = final_grid_points_extra(1,jpoint)
+        r2(2) = final_grid_points_extra(2,jpoint)
+        r2(3) = final_grid_points_extra(3,jpoint)
+
+        v1b_r2  = j1b_nucl(r2)
+        u2b_r12 = j12_mu(r1, r2)
+        call grad1_j12_mu(r1, r2, grad1_u2b)
+
+        dx = (grad1_u2b(1) * v1b_r1 + u2b_r12 * grad1_v1b(1)) * v1b_r2
+        dy = (grad1_u2b(2) * v1b_r1 + u2b_r12 * grad1_v1b(2)) * v1b_r2
+        dz = (grad1_u2b(3) * v1b_r1 + u2b_r12 * grad1_v1b(3)) * v1b_r2
+
+        grad1_u12_num(jpoint,ipoint,1) = dx 
+        grad1_u12_num(jpoint,ipoint,2) = dy 
+        grad1_u12_num(jpoint,ipoint,3) = dz 
+
+        grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+  elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then
+
+    !$OMP PARALLEL                                                                   &
+    !$OMP DEFAULT (NONE)                                                             &
+    !$OMP PRIVATE (ipoint, jpoint, r1, r2, grad1_u2b, dx, dy, dz)                    &
+    !$OMP SHARED (n_points_final_grid, n_points_extra_final_grid, final_grid_points, &
+    !$OMP         final_grid_points_extra, grad1_u12_num, grad1_u12_squared_num)
+    !$OMP DO SCHEDULE (static)
+    do ipoint = 1, n_points_final_grid  ! r1
+
+      r1(1) = final_grid_points(1,ipoint)
+      r1(2) = final_grid_points(2,ipoint)
+      r1(3) = final_grid_points(3,ipoint)
+
+      do jpoint = 1, n_points_extra_final_grid ! r2 
+
+        r2(1) = final_grid_points_extra(1,jpoint)
+        r2(2) = final_grid_points_extra(2,jpoint)
+        r2(3) = final_grid_points_extra(3,jpoint)
+
+        call grad1_j12_mu(r1, r2, grad1_u2b)
+
+        dx = grad1_u2b(1)
+        dy = grad1_u2b(2)
+        dz = grad1_u2b(3)
+
+        grad1_u12_num(jpoint,ipoint,1) = dx 
+        grad1_u12_num(jpoint,ipoint,2) = dy 
+        grad1_u12_num(jpoint,ipoint,3) = dz 
+
+        grad1_u12_squared_num(jpoint,ipoint) = dx*dx + dy*dy + dz*dz
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+  else
+
+    print *, ' j1b_type = ', j1b_type, 'not implemented yet'
+    stop
+
+  endif
+
+END_PROVIDER
+
+! ---
+
+double precision function j12_mu(r1, r2)
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: mu_tmp, r12
+
+  if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then
+
+    r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) &
+               + (r1(2) - r2(2)) * (r1(2) - r2(2)) &
+               + (r1(3) - r2(3)) * (r1(3) - r2(3)) )
+    mu_tmp = mu_erf * r12
+
+    j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_tmp)) - inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / mu_erf
+
+  else
+
+    print *, ' j1b_type = ', j1b_type, 'not implemented for j12_mu'
+    stop
+
+  endif
+
+  return
+end function j12_mu
+
+! ---
+
+subroutine grad1_j12_mu(r1, r2, grad)
+
+  BEGIN_DOC
+!  gradient of j(mu(r1,r2),r12) form of jastrow. 
+!
+! if mu(r1,r2) = cst ---> j1b_type < 200 and 
+!
+!  d/dx1 j(mu,r12) = 0.5 * (1 - erf(mu *r12))/r12 * (x1 - x2)
+!
+! if mu(r1,r2) /= cst ---> 200 < j1b_type < 300 and 
+!
+! d/dx1 j(mu(r1,r2),r12) = exp(-(mu(r1,r2)*r12)**2) /(2 *sqrt(pi) * mu(r1,r2)**2 )   d/dx1 mu(r1,r2) 
+!
+!                          + 0.5 * (1 - erf(mu(r1,r2) *r12))/r12 * (x1 - x2)
+  END_DOC
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in)  :: r1(3), r2(3)
+  double precision, intent(out) :: grad(3)
+  double precision              :: dx, dy, dz, r12, tmp
+
+  grad = 0.d0
+
+  if((j1b_type .ge. 0) .and. (j1b_type .lt. 200)) then
+
+    dx = r1(1) - r2(1)
+    dy = r1(2) - r2(2)
+    dz = r1(3) - r2(3)
+
+    r12 = dsqrt(dx * dx + dy * dy + dz * dz)
+    if(r12 .lt. 1d-10) return
+
+    tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12
+
+    grad(1) = tmp * dx
+    grad(2) = tmp * dy
+    grad(3) = tmp * dz
+
+  elseif((j1b_type .ge. 200) .and. (j1b_type .lt. 300)) then
+
+    double precision :: mu_val, mu_tmp, mu_der(3)
+
+    dx  = r1(1) - r2(1)
+    dy  = r1(2) - r2(2)
+    dz  = r1(3) - r2(3)
+    r12 = dsqrt(dx * dx + dy * dy + dz * dz)
+
+    call mu_r_val_and_grad(r1, r2, mu_val, mu_der)
+    mu_tmp  = mu_val * r12
+    tmp     = inv_sq_pi_2 * dexp(-mu_tmp*mu_tmp) / (mu_val * mu_val)
+    grad(1) = tmp * mu_der(1)
+    grad(2) = tmp * mu_der(2)
+    grad(3) = tmp * mu_der(3)
+
+    if(r12 .lt. 1d-10) return
+    tmp     = 0.5d0 * (1.d0 - derf(mu_tmp)) / r12
+    grad(1) = grad(1) + tmp * dx
+    grad(2) = grad(2) + tmp * dy
+    grad(3) = grad(3) + tmp * dz
+
+  else
+
+    print *, ' j1b_type = ', j1b_type, 'not implemented yet'
+    stop
+
+  endif
+
+  return
+end subroutine grad1_j12_mu
+
+! ---
+
+double precision function j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  integer                      :: i
+  double precision             :: a, d, e, x, y, z
+
+  if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then
+
+    j1b_nucl = 1.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) &
+          + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) &
+          + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) )
+      j1b_nucl = j1b_nucl - dexp(-a*dsqrt(d))
+    enddo
+
+  elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then
+
+    j1b_nucl = 1.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) &
+          + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) &
+          + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) )
+      e = 1.d0 - dexp(-a*d)
+      j1b_nucl = j1b_nucl * e
+    enddo
+
+  elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then
+
+    j1b_nucl = 1.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) &
+          + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) &
+          + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) )
+      j1b_nucl = j1b_nucl - dexp(-a*d)
+    enddo
+
+  elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then
+
+    j1b_nucl = 1.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      x = r(1) - nucl_coord(i,1)
+      y = r(2) - nucl_coord(i,2)
+      z = r(3) - nucl_coord(i,3)
+      d = x*x + y*y + z*z
+      j1b_nucl = j1b_nucl - dexp(-a*d*d)
+    enddo
+
+  else
+
+    print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl'
+    stop
+
+  endif
+
+  return
+end function j1b_nucl
+
+! ---
+
+double precision function j1b_nucl_square(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  integer                      :: i
+  double precision             :: a, d, e, x, y, z
+
+  if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then
+
+    j1b_nucl_square = 1.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) &
+          + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) &
+          + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) )
+      j1b_nucl_square = j1b_nucl_square - dexp(-a*dsqrt(d))
+    enddo
+    j1b_nucl_square = j1b_nucl_square * j1b_nucl_square
+
+  elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then
+
+    j1b_nucl_square = 1.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) &
+          + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) &
+          + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) )
+      e = 1.d0 - dexp(-a*d)
+      j1b_nucl_square = j1b_nucl_square * e
+    enddo
+    j1b_nucl_square = j1b_nucl_square * j1b_nucl_square
+
+  elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then
+
+    j1b_nucl_square = 1.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) &
+          + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) &
+          + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) )
+      j1b_nucl_square = j1b_nucl_square - dexp(-a*d)
+    enddo
+    j1b_nucl_square = j1b_nucl_square * j1b_nucl_square
+
+  elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then
+
+    j1b_nucl_square = 1.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      x = r(1) - nucl_coord(i,1)
+      y = r(2) - nucl_coord(i,2)
+      z = r(3) - nucl_coord(i,3)
+      d = x*x + y*y + z*z
+      j1b_nucl_square = j1b_nucl_square - dexp(-a*d*d)
+    enddo
+    j1b_nucl_square = j1b_nucl_square * j1b_nucl_square
+
+  else
+
+    print *, ' j1b_type = ', j1b_type, 'not implemented for j1b_nucl_square'
+    stop
+
+  endif
+
+  return
+end function j1b_nucl_square
+
+! ---
+
+subroutine grad1_j1b_nucl(r, grad)
+
+  implicit none
+  double precision, intent(in)  :: r(3)
+  double precision, intent(out) :: grad(3)
+  integer                       :: ipoint, i, j, phase
+  double precision              :: x, y, z, dx, dy, dz
+  double precision              :: a, d, e
+  double precision              :: fact_x, fact_y, fact_z
+  double precision              :: ax_der, ay_der, az_der, a_expo
+
+  if((j1b_type .eq. 2) .or. (j1b_type .eq. 102)) then
+
+    fact_x = 0.d0
+    fact_y = 0.d0
+    fact_z = 0.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      x = r(1) - nucl_coord(i,1)
+      y = r(2) - nucl_coord(i,2)
+      z = r(3) - nucl_coord(i,3)
+      d = dsqrt(x*x + y*y + z*z)
+      e = a * dexp(-a*d) / d
+
+      fact_x += e * x
+      fact_y += e * y
+      fact_z += e * z
+    enddo
+
+    grad(1) = fact_x
+    grad(2) = fact_y
+    grad(3) = fact_z
+
+  elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 103)) then
+
+    x = r(1)
+    y = r(2)
+    z = r(3)
+
+    fact_x = 0.d0
+    fact_y = 0.d0
+    fact_z = 0.d0
+    do i = 1, List_all_comb_b2_size
+
+      phase  = 0
+      a_expo = 0.d0
+      ax_der = 0.d0
+      ay_der = 0.d0
+      az_der = 0.d0
+      do j = 1, nucl_num
+        a  = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+        dx = x - nucl_coord(j,1)
+        dy = y - nucl_coord(j,2)
+        dz = z - nucl_coord(j,3)
+
+        phase  += List_all_comb_b2(j,i)
+        a_expo += a * (dx*dx + dy*dy + dz*dz)
+        ax_der += a * dx
+        ay_der += a * dy
+        az_der += a * dz
+      enddo
+      e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo)
+
+      fact_x += e * ax_der
+      fact_y += e * ay_der
+      fact_z += e * az_der
+    enddo
+
+    grad(1) = fact_x
+    grad(2) = fact_y
+    grad(3) = fact_z
+
+  elseif((j1b_type .eq. 4) .or. (j1b_type .eq. 104)) then
+
+    fact_x = 0.d0
+    fact_y = 0.d0
+    fact_z = 0.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      x = r(1) - nucl_coord(i,1)
+      y = r(2) - nucl_coord(i,2)
+      z = r(3) - nucl_coord(i,3)
+      d = x*x + y*y + z*z
+      e = a * dexp(-a*d)
+
+      fact_x += e * x
+      fact_y += e * y
+      fact_z += e * z
+    enddo
+
+    grad(1) = 2.d0 * fact_x
+    grad(2) = 2.d0 * fact_y
+    grad(3) = 2.d0 * fact_z
+
+  elseif((j1b_type .eq. 5) .or. (j1b_type .eq. 105)) then
+
+    fact_x = 0.d0
+    fact_y = 0.d0
+    fact_z = 0.d0
+    do i = 1, nucl_num
+      a = j1b_pen(i)
+      x = r(1) - nucl_coord(i,1)
+      y = r(2) - nucl_coord(i,2)
+      z = r(3) - nucl_coord(i,3)
+      d = x*x + y*y + z*z
+      e = a * d * dexp(-a*d*d)
+
+      fact_x += e * x
+      fact_y += e * y
+      fact_z += e * z
+    enddo
+
+    grad(1) = 4.d0 * fact_x
+    grad(2) = 4.d0 * fact_y
+    grad(3) = 4.d0 * fact_z
+
+  else
+
+    print *, ' j1b_type = ', j1b_type, 'not implemented for grad1_j1b_nucl'
+    stop
+
+  endif
+
+  return
+end subroutine grad1_j1b_nucl
+
+! ---
+
+subroutine mu_r_val_and_grad(r1, r2, mu_val, mu_der)
+
+  implicit none
+  double precision, intent(in)  :: r1(3), r2(3)
+  double precision, intent(out) :: mu_val, mu_der(3)
+  double precision              :: r(3)
+  double precision              :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1)
+  double precision              :: dm_tot, tmp1, tmp2, tmp3
+  double precision :: rho1, grad_rho1(3),rho2,rho_tot,inv_rho_tot
+  double precision :: f_rho1, f_rho2, d_drho_f_rho1
+  double precision :: d_dx1_f_rho1(3),d_dx_rho_f_rho(3),nume
+
+  if(j1b_type .eq. 200) then
+
+    !
+    ! r = 0.5 (r1 + r2)
+    !
+    ! mu[rho(r)] = alpha sqrt(rho) + mu0 exp(-rho)
+    !
+    ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx
+    ! d mu[rho(r)] / dx  = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx)
+    !
+
+    PROVIDE mu_r_ct mu_erf
+
+    r(1) = 0.5d0 * (r1(1) + r2(1))
+    r(2) = 0.5d0 * (r1(2) + r2(2))
+    r(3) = 0.5d0 * (r1(3) + r2(3))
+
+    call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b)
+
+    dm_tot = dm_a(1) + dm_b(1)
+    tmp1   = dsqrt(dm_tot)
+    tmp2   = mu_erf * dexp(-dm_tot)
+
+    mu_val = mu_r_ct * tmp1 + tmp2
+
+    mu_der = 0.d0
+    if(dm_tot .lt. 1d-7) return
+
+    tmp3      = 0.25d0 * mu_r_ct / tmp1 - 0.5d0 * tmp2
+    mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1))
+    mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1))
+    mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1))
+
+  elseif(j1b_type .eq. 201) then
+
+    !
+    ! r = 0.5 (r1 + r2)
+    !
+    ! mu[rho(r)] = alpha rho + mu0 exp(-rho)
+    !
+    ! d mu[rho(r)] / dx1 = 0.5 d mu[rho(r)] / dx
+    ! d mu[rho(r)] / dx  = [0.5 alpha / sqrt(rho) - mu0 exp(-rho)] (d rho(r) / dx)
+    !
+
+    PROVIDE mu_r_ct mu_erf
+
+    r(1) = 0.5d0 * (r1(1) + r2(1))
+    r(2) = 0.5d0 * (r1(2) + r2(2))
+    r(3) = 0.5d0 * (r1(3) + r2(3))
+
+    call density_and_grad_alpha_beta(r, dm_a, dm_b, grad_dm_a, grad_dm_b)
+
+    dm_tot = dm_a(1) + dm_b(1)
+    tmp2   = mu_erf * dexp(-dm_tot)
+
+    mu_val = mu_r_ct * dm_tot + tmp2
+
+    tmp3      = 0.5d0 * (mu_r_ct - tmp2)
+    mu_der(1) = tmp3 * (grad_dm_a(1,1) + grad_dm_b(1,1))
+    mu_der(2) = tmp3 * (grad_dm_a(2,1) + grad_dm_b(2,1))
+    mu_der(3) = tmp3 * (grad_dm_a(3,1) + grad_dm_b(3,1))
+
+  elseif(j1b_type .eq. 202) then
+
+    ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO
+    !
+    ! RHO = rho(r1) + rho(r2)
+    !
+    ! f[rho] = alpha rho^beta + mu0 exp(-rho)
+    !
+    ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) 
+    !                              - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] }
+    !
+    ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) - mu0 exp(-rho(r1))] (d rho(r1) / dx1)
+    !
+    ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1)
+     
+    !!!!!!!!! rho1,rho2,rho1+rho2
+    call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1)
+    rho_tot = rho1 + rho2
+    if(rho_tot.lt.1.d-10)rho_tot = 1.d-10
+    inv_rho_tot = 1.d0/rho_tot
+    ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf * exp(-rho)
+    call get_all_f_rho(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2)
+    d_dx1_f_rho1(1:3)   = d_drho_f_rho1 * grad_rho1(1:3)
+    d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3)
+    nume   = rho1 * f_rho1 + rho2 * f_rho2
+    mu_val = nume * inv_rho_tot
+    mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume)
+  elseif(j1b_type .eq. 203) then
+
+    ! mu(r1,r2) = {rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]} / RHO
+    !
+    ! RHO = rho(r1) + rho(r2)
+    !
+    ! f[rho] = alpha rho^beta + mu0 
+    !
+    ! d/dx1 mu(r1,r2) = 1/RHO^2 * {RHO * d/dx1 (rho(r1) f[rho(r1)]) 
+    !                              - d/dx1 rho(r1) * [rho(r1) f[rho(r1)] + rho(r2) f[rho(r2)]] }
+    !
+    ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1)
+    !
+    ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1)
+     
+    !!!!!!!!! rho1,rho2,rho1+rho2
+    call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1)
+    rho_tot = rho1 + rho2
+    if(rho_tot.lt.1.d-10)rho_tot = 1.d-10
+    inv_rho_tot = 1.d0/rho_tot
+    ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf 
+    call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2)
+    d_dx1_f_rho1(1:3)   = d_drho_f_rho1 * grad_rho1(1:3)
+    d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3)
+    nume   = rho1 * f_rho1 + rho2 * f_rho2
+    mu_val = nume * inv_rho_tot
+    mu_der(1:3) = inv_rho_tot*inv_rho_tot * (rho_tot * d_dx_rho_f_rho(1:3) - grad_rho1(1:3) * nume)
+  elseif(j1b_type .eq. 204) then
+
+    ! mu(r1,r2) = 1/2 * (f[rho(r1)] + f[rho(r2)]} 
+    !
+    ! f[rho] = alpha rho^beta + mu0 
+    !
+    ! d/dx1 mu(r1,r2) = 1/2 * d/dx1 (rho(r1) f[rho(r1)])
+    !                   
+    ! d/dx1 f[rho(r1)] = [0.5 alpha / sqrt(rho(r1)) ] (d rho(r1) / dx1)
+    !
+    ! d/dx1 (rho(r1) f[rho(r1)] = rho(r1) * d/dx1 f[rho(r1)] + f[rho(r1)] * d/dx1 rho(r1)
+     
+    !!!!!!!!! rho1,rho2,rho1+rho2
+    call get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1)
+    rho_tot = rho1 + rho2
+    if(rho_tot.lt.1.d-10)rho_tot = 1.d-10
+    inv_rho_tot = 1.d0/rho_tot
+    ! f(rho) = mu_r_ct * rho**beta_rho_power + mu_erf 
+    call get_all_f_rho_simple(rho1,rho2,mu_r_ct,mu_erf,beta_rho_power,f_rho1,d_drho_f_rho1,f_rho2)
+    d_dx1_f_rho1(1:3)   = d_drho_f_rho1 * grad_rho1(1:3)
+    d_dx_rho_f_rho(1:3) = rho1 * d_dx1_f_rho1(1:3) + f_rho1 * grad_rho1(1:3)
+    mu_val = 0.5d0 * ( f_rho1 + f_rho2)
+    mu_der(1:3) = d_dx_rho_f_rho(1:3) 
+  else
+    print *, ' j1b_type = ', j1b_type, 'not implemented yet'
+    stop
+
+  endif
+
+  return
+end subroutine mu_r_val_and_grad
+
+! ---
+
+subroutine grad1_j1b_nucl_square_num(r1, grad)
+
+  implicit none
+  double precision, intent(in)  :: r1(3)
+  double precision, intent(out) :: grad(3)
+  double precision              :: r(3), eps, tmp_eps, vp, vm
+  double precision, external    :: j1b_nucl_square
+
+  eps     = 1d-5
+  tmp_eps = 0.5d0 / eps
+
+  r(1:3) = r1(1:3)
+
+  r(1) = r(1) + eps
+  vp   = j1b_nucl_square(r)
+  r(1) = r(1) - 2.d0 * eps
+  vm   = j1b_nucl_square(r)
+  r(1) = r(1) + eps
+  grad(1) = tmp_eps * (vp - vm)
+
+  r(2) = r(2) + eps
+  vp   = j1b_nucl_square(r)
+  r(2) = r(2) - 2.d0 * eps
+  vm   = j1b_nucl_square(r)
+  r(2) = r(2) + eps
+  grad(2) = tmp_eps * (vp - vm)
+
+  r(3) = r(3) + eps
+  vp   = j1b_nucl_square(r)
+  r(3) = r(3) - 2.d0 * eps
+  vm   = j1b_nucl_square(r)
+  r(3) = r(3) + eps
+  grad(3) = tmp_eps * (vp - vm)
+  
+  return
+end subroutine grad1_j1b_nucl_square_num
+
+! ---
+
+subroutine grad1_j12_mu_square_num(r1, r2, grad)
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in)  :: r1(3), r2(3)
+  double precision, intent(out) :: grad(3)
+  double precision              :: r(3)
+  double precision              :: eps, tmp_eps, vp, vm
+  double precision, external    :: j12_mu_square
+
+  eps     = 1d-5
+  tmp_eps = 0.5d0 / eps
+
+  r(1:3) = r1(1:3)
+
+  r(1)    = r(1) + eps
+  vp      = j12_mu_square(r, r2)
+  r(1)    = r(1) - 2.d0 * eps
+  vm      = j12_mu_square(r, r2)
+  r(1)    = r(1) + eps
+  grad(1) = tmp_eps * (vp - vm)
+
+  r(2)    = r(2) + eps
+  vp      = j12_mu_square(r, r2)
+  r(2)    = r(2) - 2.d0 * eps
+  vm      = j12_mu_square(r, r2)
+  r(2)    = r(2) + eps
+  grad(2) = tmp_eps * (vp - vm)
+
+  r(3)    = r(3) + eps
+  vp      = j12_mu_square(r, r2)
+  r(3)    = r(3) - 2.d0 * eps
+  vm      = j12_mu_square(r, r2)
+  r(3)    = r(3) + eps
+  grad(3) = tmp_eps * (vp - vm)
+
+  return
+end subroutine grad1_j12_mu_square_num
+
+! ---
+
+double precision function j12_mu_square(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision, external   :: j12_mu
+
+  j12_mu_square = j12_mu(r1, r2) * j12_mu(r1, r2)
+
+  return
+end function j12_mu_square
+
+! ---
+
+subroutine f_mu_and_deriv_mu(rho,alpha,mu0,beta,f_mu,d_drho_f_mu)
+ implicit none
+ BEGIN_DOC
+! function giving mu as a function of rho
+!
+! f_mu = alpha * rho**beta + mu0 * exp(-rho)
+!
+! and its derivative with respect to rho d_drho_f_mu
+ END_DOC
+ double precision, intent(in)  :: rho,alpha,mu0,beta
+ double precision, intent(out) :: f_mu,d_drho_f_mu
+ f_mu = alpha * (rho)**beta + mu0 * dexp(-rho)
+ d_drho_f_mu = alpha * beta * rho**(beta-1.d0) - mu0 * dexp(-rho)
+
+end
+
+
+subroutine get_all_rho_grad_rho(r1,r2,rho1,rho2,grad_rho1)
+ implicit none
+ BEGIN_DOC
+! returns the density in r1,r2 and grad_rho at r1
+ END_DOC
+ double precision, intent(in) :: r1(3),r2(3)
+ double precision, intent(out):: grad_rho1(3),rho1,rho2
+ double precision              :: dm_a(1), dm_b(1), grad_dm_a(3,1), grad_dm_b(3,1)
+ call density_and_grad_alpha_beta(r1, dm_a, dm_b, grad_dm_a, grad_dm_b)
+ rho1 = dm_a(1) + dm_b(1)
+ grad_rho1(1:3) = grad_dm_a(1:3,1) + grad_dm_b(1:3,1)
+ call density_and_grad_alpha_beta(r2, dm_a, dm_b, grad_dm_a, grad_dm_b)
+ rho2 = dm_a(1) + dm_b(1)
+end
+
+subroutine get_all_f_rho(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2)
+ implicit none
+ BEGIN_DOC
+! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1))
+ END_DOC
+ double precision, intent(in) :: rho1,rho2,alpha,mu0,beta
+ double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2
+ double precision :: tmp
+ call f_mu_and_deriv_mu(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1)
+ call f_mu_and_deriv_mu(rho2,alpha,mu0,beta,f_rho2,tmp)
+end
+
+
+subroutine get_all_f_rho_simple(rho1,rho2,alpha,mu0,beta,f_rho1,d_drho_f_rho1,f_rho2)
+ implicit none
+ BEGIN_DOC
+! returns the values f(mu(r1)), f(mu(r2)) and d/drho(1) f(mu(r1))
+ END_DOC
+ double precision, intent(in) :: rho1,rho2,alpha,mu0,beta
+ double precision, intent(out):: f_rho1,d_drho_f_rho1,f_rho2
+ double precision :: tmp
+ call f_mu_and_deriv_mu_simple(rho1,alpha,mu0,beta,f_rho1,d_drho_f_rho1)
+ call f_mu_and_deriv_mu_simple(rho2,alpha,mu0,beta,f_rho2,tmp)
+end
+
+subroutine f_mu_and_deriv_mu_simple(rho,alpha,mu0,beta,f_mu,d_drho_f_mu)
+ implicit none
+ BEGIN_DOC
+! function giving mu as a function of rho
+!
+! f_mu = alpha * rho**beta + mu0 
+!
+! and its derivative with respect to rho d_drho_f_mu
+ END_DOC
+ double precision, intent(in)  :: rho,alpha,mu0,beta
+ double precision, intent(out) :: f_mu,d_drho_f_mu
+ f_mu = alpha * (rho)**beta + mu0 
+ d_drho_f_mu = alpha * beta * rho**(beta-1.d0) 
+
+end
+
diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f
index 754e1240..24e7e743 100644
--- a/src/non_h_ints_mu/new_grad_tc.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc.irp.f
@@ -1,164 +1,68 @@
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)]
-
-  BEGIN_DOC
-  !
-  ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
-  !
-  ! where r1 = r(ipoint)
-  !
-  ! if J(r1,r2) = u12:
-  !
-  ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
-  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
-  !
-  ! if J(r1,r2) = u12 x v1 x v2
-  !
-  ! int2_grad1_u12_ao(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
-  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
-  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
-  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
-  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
-  !
-  !
-  END_DOC
-
-  implicit none
-  integer          :: ipoint, i, j, m
-  double precision :: time0, time1
-  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
-
-  print*, ' providing int2_grad1_u12_ao ...'
-  call wall_time(time0)
-
-  PROVIDE j1b_type
-
-  if(read_tc_integ) then
-
-    open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read")
-    read(11) int2_grad1_u12_ao
-    close(11)
-
-  else
-
-    if(j1b_type .eq. 3) then
-      do ipoint = 1, n_points_final_grid
-        x = final_grid_points(1,ipoint)
-        y = final_grid_points(2,ipoint)
-        z = final_grid_points(3,ipoint)
-        tmp0  = 0.5d0 * v_1b(ipoint)
-        tmp_x =  v_1b_grad(1,ipoint)
-        tmp_y =  v_1b_grad(2,ipoint)
-        tmp_z =  v_1b_grad(3,ipoint)
-        do j = 1, ao_num
-          do i = 1, ao_num
-            tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
-            tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
-            int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
-            int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
-            int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
-          enddo
-        enddo
-      enddo
-    else
-      do ipoint = 1, n_points_final_grid
-        x = final_grid_points(1,ipoint)
-        y = final_grid_points(2,ipoint)
-        z = final_grid_points(3,ipoint)
-        do j = 1, ao_num
-          do i = 1, ao_num
-            tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
-
-            int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,1)
-            int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,2)
-            int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,3)
-          enddo
-        enddo
-      enddo
-      int2_grad1_u12_ao *= 0.5d0
-    endif
-
-  endif
-
-  if(write_tc_integ.and.mpi_master) then
-    open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write")
-    call ezfio_set_work_empty(.False.)
-    write(11) int2_grad1_u12_ao
-    close(11)
-    call ezfio_set_tc_keywords_io_tc_integ('Read')
-  endif
-
-  call wall_time(time1)
-  print*, ' Wall time for int2_grad1_u12_ao = ', time1 - time0
-
-END_PROVIDER 
-
-! ---
-
-BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) 
-  !
-  ! where r1 = r(ipoint)
-  !
-  ! if J(r1,r2) = u12:
-  !
-  ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1)
-  !                                 = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
-  !                                 = -int2_grad1_u12_ao(i,j,ipoint,:)
-  !
-  ! if J(r1,r2) = u12 x v1 x v2
-  !
-  ! int1_grad2_u12_ao(:,i,j,ipoint) =      v2    x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ]
-  !                                 - \grad_2 v2 x [       \int dr1                   u12              v1 \phi_i(r1) \phi_j(r1) ] 
-  !                                 =   -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
-  !                                 +    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
-  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
-  !
-  !
-  END_DOC
-
-  implicit none
-  integer          :: ipoint, i, j
-  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
-
-  PROVIDE j1b_type
-  
-  if(j1b_type .eq. 3) then
-
-    do ipoint = 1, n_points_final_grid
-      x = final_grid_points(1,ipoint)
-      y = final_grid_points(2,ipoint)
-      z = final_grid_points(3,ipoint)
-
-      tmp0  = 0.5d0 * v_1b(ipoint)
-      tmp_x =  v_1b_grad(1,ipoint)
-      tmp_y =  v_1b_grad(2,ipoint)
-      tmp_z =  v_1b_grad(3,ipoint)
-  
-      do j = 1, ao_num
-        do i = 1, ao_num
-
-          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
-          tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
-
-          int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
-          int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
-          int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
-        enddo
-      enddo
-    enddo
-
-  else
-
-    int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao
-
-  endif
-
-END_PROVIDER 
+!BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) 
+!  !
+!  ! where r1 = r(ipoint)
+!  !
+!  ! if J(r1,r2) = u12:
+!  !
+!  ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1)
+!  !                                 = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+!  !                                 = -int2_grad1_u12_ao(i,j,ipoint,:)
+!  !
+!  ! if J(r1,r2) = u12 x v1 x v2
+!  !
+!  ! int1_grad2_u12_ao(:,i,j,ipoint) =      v2    x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ]
+!  !                                 - \grad_2 v2 x [       \int dr1                   u12              v1 \phi_i(r1) \phi_j(r1) ] 
+!  !                                 =   -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+!  !                                 +    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+!  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+!  !
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer          :: ipoint, i, j
+!  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+!
+!  PROVIDE j1b_type
+!  
+!  if(j1b_type .eq. 3) then
+!
+!    do ipoint = 1, n_points_final_grid
+!      x = final_grid_points(1,ipoint)
+!      y = final_grid_points(2,ipoint)
+!      z = final_grid_points(3,ipoint)
+!
+!      tmp0  = 0.5d0 * v_1b(ipoint)
+!      tmp_x =  v_1b_grad(1,ipoint)
+!      tmp_y =  v_1b_grad(2,ipoint)
+!      tmp_z =  v_1b_grad(3,ipoint)
+!  
+!      do j = 1, ao_num
+!        do i = 1, ao_num
+!
+!          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+!          tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
+!
+!          int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+!          int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+!          int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
+!        enddo
+!      enddo
+!    enddo
+!
+!  else
+!
+!    int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao
+!
+!  endif
+!
+!END_PROVIDER 
 
 ! ---
 
@@ -288,7 +192,10 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
   !
   ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
   !
-  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  ! = -1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2      \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  ! =  1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 (-1) \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! -1 in \int dr2
   !
   ! This is obtained by integration by parts. 
   !
@@ -305,20 +212,14 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
 
   if(read_tc_integ) then
 
-    open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao', action="read")
-      do i = 1, ao_num
-        do j = 1, ao_num
-          do k = 1, ao_num
-            do l = 1, ao_num
-              read(11) tc_grad_and_lapl_ao(l,k,j,i)
-            enddo
-          enddo
-        enddo
-      enddo
+    open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao', action="read")
+    read(11) tc_grad_and_lapl_ao
     close(11)
 
   else
 
+    PROVIDE int2_grad1_u12_ao
+
     allocate(b_mat(n_points_final_grid,ao_num,ao_num,3))
   
     b_mat = 0.d0
@@ -350,10 +251,9 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
       call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0              &
                 , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
                 , 1.d0, tc_grad_and_lapl_ao, ao_num*ao_num) 
-  
     enddo
     deallocate(b_mat)
-  
+
     call sum_A_At(tc_grad_and_lapl_ao(1,1,1,1), ao_num*ao_num)
   ! !$OMP PARALLEL             &
   ! !$OMP DEFAULT (NONE)       &
@@ -374,18 +274,12 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
 
   endif
 
-  if(write_tc_integ) then
-    open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao', action="write")
-      do i = 1, ao_num
-        do j = 1, ao_num
-          do k = 1, ao_num
-            do l = 1, ao_num
-              write(11) tc_grad_and_lapl_ao(l,k,j,i)
-            enddo
-          enddo
-        enddo
-      enddo
+  if(write_tc_integ.and.mpi_master) then
+    open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao', action="write")
+    call ezfio_set_work_empty(.False.)
+    write(11) tc_grad_and_lapl_ao
     close(11)
+    call ezfio_set_tc_keywords_io_tc_integ('Read')
   endif
 
   call wall_time(time1)
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
index 901e3048..7ab5b327 100644
--- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -39,7 +39,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_po
     read(11) int2_grad1_u12_ao_test
     close(11)
 
-
   else
 
     if(j1b_type .eq. 3) then
diff --git a/src/non_h_ints_mu/numerical_integ.irp.f b/src/non_h_ints_mu/numerical_integ.irp.f
index dcd7a52a..f9457247 100644
--- a/src/non_h_ints_mu/numerical_integ.irp.f
+++ b/src/non_h_ints_mu/numerical_integ.irp.f
@@ -322,9 +322,9 @@ double precision function num_gradu_squared_u_ij_mu(i, j, ipoint)
   double precision, external :: ao_value
   double precision, external :: j1b_nucl
   double precision, external :: j12_mu
-  double precision, external :: grad_x_j1b_nucl
-  double precision, external :: grad_y_j1b_nucl
-  double precision, external :: grad_z_j1b_nucl
+  double precision, external :: grad_x_j1b_nucl_num
+  double precision, external :: grad_y_j1b_nucl_num
+  double precision, external :: grad_z_j1b_nucl_num
 
   r1(1) = final_grid_points(1,ipoint)
   r1(2) = final_grid_points(2,ipoint)
@@ -342,11 +342,11 @@ double precision function num_gradu_squared_u_ij_mu(i, j, ipoint)
     tmp_z = r1(3) - r2(3)
     r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
 
-    dx1_v1 = grad_x_j1b_nucl(r1)
-    dy1_v1 = grad_y_j1b_nucl(r1)
-    dz1_v1 = grad_z_j1b_nucl(r1)
+    dx1_v1 = grad_x_j1b_nucl_num(r1)
+    dy1_v1 = grad_y_j1b_nucl_num(r1)
+    dz1_v1 = grad_z_j1b_nucl_num(r1)
 
-    call grad1_j12_mu_exc(r1, r2, grad_u12)
+    call grad1_j12_mu(r1, r2, grad_u12)
 
     tmp1    = 1.d0 - derf(mu_erf * r12)
     v1_tmp  = j1b_nucl(r1)
@@ -390,9 +390,9 @@ double precision function num_grad12_j12(i, j, ipoint)
   double precision, external :: ao_value
   double precision, external :: j1b_nucl
   double precision, external :: j12_mu
-  double precision, external :: grad_x_j1b_nucl
-  double precision, external :: grad_y_j1b_nucl
-  double precision, external :: grad_z_j1b_nucl
+  double precision, external :: grad_x_j1b_nucl_num
+  double precision, external :: grad_y_j1b_nucl_num
+  double precision, external :: grad_z_j1b_nucl_num
 
   r1(1) = final_grid_points(1,ipoint)
   r1(2) = final_grid_points(2,ipoint)
@@ -410,11 +410,11 @@ double precision function num_grad12_j12(i, j, ipoint)
     tmp_z = r1(3) - r2(3)
     r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
 
-    dx1_v1 = grad_x_j1b_nucl(r1)
-    dy1_v1 = grad_y_j1b_nucl(r1)
-    dz1_v1 = grad_z_j1b_nucl(r1)
+    dx1_v1 = grad_x_j1b_nucl_num(r1)
+    dy1_v1 = grad_y_j1b_nucl_num(r1)
+    dz1_v1 = grad_z_j1b_nucl_num(r1)
 
-    call grad1_j12_mu_exc(r1, r2, grad_u12)
+    call grad1_j12_mu(r1, r2, grad_u12)
 
     tmp1    = 1.d0 - derf(mu_erf * r12)
     v1_tmp  = j1b_nucl(r1)
@@ -456,9 +456,9 @@ double precision function num_u12sq_j1bsq(i, j, ipoint)
   double precision, external :: ao_value
   double precision, external :: j1b_nucl
   double precision, external :: j12_mu
-  double precision, external :: grad_x_j1b_nucl
-  double precision, external :: grad_y_j1b_nucl
-  double precision, external :: grad_z_j1b_nucl
+  double precision, external :: grad_x_j1b_nucl_num
+  double precision, external :: grad_y_j1b_nucl_num
+  double precision, external :: grad_z_j1b_nucl_num
 
   r1(1) = final_grid_points(1,ipoint)
   r1(2) = final_grid_points(2,ipoint)
@@ -476,11 +476,11 @@ double precision function num_u12sq_j1bsq(i, j, ipoint)
     tmp_z = r1(3) - r2(3)
     r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
 
-    dx1_v1 = grad_x_j1b_nucl(r1)
-    dy1_v1 = grad_y_j1b_nucl(r1)
-    dz1_v1 = grad_z_j1b_nucl(r1)
+    dx1_v1 = grad_x_j1b_nucl_num(r1)
+    dy1_v1 = grad_y_j1b_nucl_num(r1)
+    dz1_v1 = grad_z_j1b_nucl_num(r1)
 
-    call grad1_j12_mu_exc(r1, r2, grad_u12)
+    call grad1_j12_mu(r1, r2, grad_u12)
 
     tmp1    = 1.d0 - derf(mu_erf * r12)
     v1_tmp  = j1b_nucl(r1)
@@ -522,9 +522,9 @@ double precision function num_u12_grad1_u12_j1b_grad1_j1b(i, j, ipoint)
   double precision, external :: ao_value
   double precision, external :: j1b_nucl
   double precision, external :: j12_mu
-  double precision, external :: grad_x_j1b_nucl
-  double precision, external :: grad_y_j1b_nucl
-  double precision, external :: grad_z_j1b_nucl
+  double precision, external :: grad_x_j1b_nucl_num
+  double precision, external :: grad_y_j1b_nucl_num
+  double precision, external :: grad_z_j1b_nucl_num
 
   r1(1) = final_grid_points(1,ipoint)
   r1(2) = final_grid_points(2,ipoint)
@@ -542,11 +542,11 @@ double precision function num_u12_grad1_u12_j1b_grad1_j1b(i, j, ipoint)
     tmp_z = r1(3) - r2(3)
     r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
 
-    dx1_v1 = grad_x_j1b_nucl(r1)
-    dy1_v1 = grad_y_j1b_nucl(r1)
-    dz1_v1 = grad_z_j1b_nucl(r1)
+    dx1_v1 = grad_x_j1b_nucl_num(r1)
+    dy1_v1 = grad_y_j1b_nucl_num(r1)
+    dz1_v1 = grad_z_j1b_nucl_num(r1)
 
-    call grad1_j12_mu_exc(r1, r2, grad_u12)
+    call grad1_j12_mu(r1, r2, grad_u12)
 
     tmp1    = 1.d0 - derf(mu_erf * r12)
     v1_tmp  = j1b_nucl(r1)
diff --git a/src/non_h_ints_mu/plot_mu_of_r.irp.f b/src/non_h_ints_mu/plot_mu_of_r.irp.f
new file mode 100644
index 00000000..1100cd7c
--- /dev/null
+++ b/src/non_h_ints_mu/plot_mu_of_r.irp.f
@@ -0,0 +1,33 @@
+program plot_mu_of_r
+ implicit none
+ read_wf = .False.
+ touch read_wf 
+ call routine_print
+
+end
+
+
+subroutine routine_print
+ implicit none
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.mu_of_r'
+ i_unit_output = getUnitAndOpen(output,'w')
+ integer :: ipoint,nx
+ double precision :: xmax,xmin,r(3),dx
+ double precision :: mu_val, mu_der(3),dm_a,dm_b,grad
+ xmax =  5.D0
+ xmin = -5.D0
+ nx = 10000
+ dx = (xmax - xmin)/dble(nx)
+ r = 0.d0
+ r(1) = xmin
+ do ipoint = 1, nx
+  call mu_r_val_and_grad(r, r, mu_val, mu_der)
+  call dm_dft_alpha_beta_at_r(r,dm_a,dm_b)
+  grad = mu_der(1)**2 + mu_der(2)**2 + mu_der(3)**2 
+  grad = dsqrt(grad)
+  write(i_unit_output,'(100(F16.7,X))')r(1),mu_val,dm_a+dm_b,grad
+  r(1) += dx
+ enddo
+end
diff --git a/src/non_h_ints_mu/tc_integ.irp.f b/src/non_h_ints_mu/tc_integ.irp.f
new file mode 100644
index 00000000..d5995ae5
--- /dev/null
+++ b/src/non_h_ints_mu/tc_integ.irp.f
@@ -0,0 +1,331 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12 (j1b_type .eq. 1)
+  !
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !
+  ! if J(r1,r2) = u12 x v1 x v2 (j1b_type .eq. 3)
+  !
+  ! int2_grad1_u12_ao(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
+  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j, m, jpoint
+  double precision :: time0, time1
+  double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(read_tc_integ) then
+
+    open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="read")
+    read(11) int2_grad1_u12_ao
+
+  else
+
+    if(j1b_type .eq. 0) then
+
+      PROVIDE v_ij_erf_rk_cst_mu x_v_ij_erf_rk_cst_mu
+
+      int2_grad1_u12_ao = 0.d0
+      !$OMP PARALLEL                                                &
+      !$OMP DEFAULT (NONE)                                          &
+      !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp1)                   &
+      !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points &
+      !$OMP        , v_ij_erf_rk_cst_mu, x_v_ij_erf_rk_cst_mu, int2_grad1_u12_ao)
+      !$OMP DO SCHEDULE (static)
+      do ipoint = 1, n_points_final_grid
+        x = final_grid_points(1,ipoint)
+        y = final_grid_points(2,ipoint)
+        z = final_grid_points(3,ipoint)
+        do j = 1, ao_num
+          do i = 1, ao_num
+            tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+            int2_grad1_u12_ao(i,j,ipoint,1) = 0.5d0 * (tmp1 * x - x_v_ij_erf_rk_cst_mu(i,j,ipoint,1))
+            int2_grad1_u12_ao(i,j,ipoint,2) = 0.5d0 * (tmp1 * y - x_v_ij_erf_rk_cst_mu(i,j,ipoint,2))
+            int2_grad1_u12_ao(i,j,ipoint,3) = 0.5d0 * (tmp1 * z - x_v_ij_erf_rk_cst_mu(i,j,ipoint,3))
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      !$OMP END PARALLEL
+
+    elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4)) then
+
+      PROVIDE v_1b_grad v_ij_erf_rk_cst_mu_j1b v_ij_u_cst_mu_j1b x_v_ij_erf_rk_cst_mu_j1b
+
+      int2_grad1_u12_ao = 0.d0
+      !$OMP PARALLEL                                                                 &
+      !$OMP DEFAULT (NONE)                                                           &
+      !$OMP PRIVATE (ipoint, i, j, x, y, z, tmp0, tmp1, tmp2, tmp_x, tmp_y, tmp_z)   &
+      !$OMP SHARED ( ao_num, n_points_final_grid, final_grid_points, v_1b, v_1b_grad &
+      !$OMP        , v_ij_erf_rk_cst_mu_j1b, v_ij_u_cst_mu_j1b, x_v_ij_erf_rk_cst_mu_j1b, int2_grad1_u12_ao)
+      !$OMP DO SCHEDULE (static)
+      do ipoint = 1, n_points_final_grid
+        x     = final_grid_points(1,ipoint)
+        y     = final_grid_points(2,ipoint)
+        z     = final_grid_points(3,ipoint)
+        tmp0  =        0.5d0 * v_1b(ipoint)
+        tmp_x =         v_1b_grad(1,ipoint)
+        tmp_y =         v_1b_grad(2,ipoint)
+        tmp_z =         v_1b_grad(3,ipoint)
+        do j = 1, ao_num
+          do i = 1, ao_num
+            tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+            tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
+            int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+            int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+            int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      !$OMP END PARALLEL
+
+    elseif(j1b_type .ge. 100) then
+
+      PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra
+      PROVIDE grad1_u12_num
+
+      double precision, allocatable :: tmp(:,:,:)
+      allocate(tmp(n_points_extra_final_grid,ao_num,ao_num))
+      tmp = 0.d0
+      !$OMP PARALLEL               &
+      !$OMP DEFAULT (NONE)         &
+      !$OMP PRIVATE (j, i, jpoint) &
+      !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp)
+      !$OMP DO SCHEDULE (static)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do jpoint = 1, n_points_extra_final_grid
+            tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j)
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      !$OMP END PARALLEL
+
+      int2_grad1_u12_ao = 0.d0
+      do m = 1, 3
+        !call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, +1.d0         &
+        ! this work also because of the symmetry in K(1,2) and sign compensation in L(1,2,3)
+        call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -1.d0         &
+                  , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_num(1,1,m), n_points_extra_final_grid &
+                  , 0.d0, int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num) 
+      enddo
+
+      !! these dgemm are equivalent to
+      !!$OMP PARALLEL                                                           &
+      !!$OMP DEFAULT (NONE)                                                     &
+      !!$OMP PRIVATE (j, i, ipoint, jpoint, w)                                  &
+      !!$OMP SHARED (int2_grad1_u12_ao, ao_num, n_points_final_grid,            &
+      !!$OMP         n_points_extra_final_grid, final_weight_at_r_vector_extra, &
+      !!$OMP         aos_in_r_array_extra_transp, grad1_u12_num, tmp)
+      !!$OMP DO SCHEDULE (static)
+      !do ipoint = 1, n_points_final_grid
+      !  do j = 1, ao_num
+      !    do i = 1, ao_num
+      !      do jpoint = 1, n_points_extra_final_grid
+      !        w = -tmp(jpoint,i,j)
+      !        !w = tmp(jpoint,i,j) this work also because of the symmetry in K(1,2)
+      !        !                    and sign compensation in L(1,2,3)
+      !        int2_grad1_u12_ao(i,j,ipoint,1) += w * grad1_u12_num(jpoint,ipoint,1)
+      !        int2_grad1_u12_ao(i,j,ipoint,2) += w * grad1_u12_num(jpoint,ipoint,2)
+      !        int2_grad1_u12_ao(i,j,ipoint,3) += w * grad1_u12_num(jpoint,ipoint,3)
+      !      enddo
+      !    enddo
+      !  enddo
+      !enddo
+      !!$OMP END DO
+      !!$OMP END PARALLEL
+
+      deallocate(tmp)
+    else
+
+      print *, ' j1b_type = ', j1b_type, 'not implemented yet'
+      stop
+
+    endif
+  endif
+
+  if(write_tc_integ.and.mpi_master) then
+    open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/int2_grad1_u12_ao', action="write")
+    call ezfio_set_work_empty(.False.)
+    write(11) int2_grad1_u12_ao
+    close(11)
+    call ezfio_set_tc_keywords_io_tc_integ('Read')
+  endif
+
+  call wall_time(time1)
+  print*, ' wall time for int2_grad1_u12_ao =', time1-time0 
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, int2_grad1_u12_square_ao, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_square_ao = -(1/2) x int dr2 chi_l(r2) chi_j(r2) [grad_1 u(r1,r2)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j, m, jpoint
+  double precision :: time0, time1
+  double precision :: x, y, z, w, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_square_ao ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 0) then
+
+    PROVIDE int2_grad1u2_grad2u2
+
+    int2_grad1_u12_square_ao = 0.d0
+    !$OMP PARALLEL               &
+    !$OMP DEFAULT (NONE)         &
+    !$OMP PRIVATE (i, j, ipoint) &
+    !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, int2_grad1u2_grad2u2)
+    !$OMP DO SCHEDULE (static)
+    do ipoint = 1, n_points_final_grid
+      do j = 1, ao_num
+        do i = 1, ao_num
+          int2_grad1_u12_square_ao(i,j,ipoint) = int2_grad1u2_grad2u2(i,j,ipoint)
+        enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+  elseif((j1b_type .eq. 3) .or. (j1b_type .eq. 4))  then
+
+    if(use_ipp) then
+
+      ! the term u12_grad1_u12_j1b_grad1_j1b is added directly for performance
+      PROVIDE u12sq_j1bsq grad12_j12
+
+      int2_grad1_u12_square_ao = 0.d0
+      !$OMP PARALLEL               &
+      !$OMP DEFAULT (NONE)         &
+      !$OMP PRIVATE (i, j, ipoint) &
+      !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12)
+      !$OMP DO SCHEDULE (static)
+      do ipoint = 1, n_points_final_grid
+        do j = 1, ao_num
+          do i = 1, ao_num
+            int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint)
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      !$OMP END PARALLEL
+
+    else
+
+      PROVIDE u12sq_j1bsq u12_grad1_u12_j1b_grad1_j1b grad12_j12
+
+      int2_grad1_u12_square_ao = 0.d0
+      !$OMP PARALLEL               &
+      !$OMP DEFAULT (NONE)         &
+      !$OMP PRIVATE (i, j, ipoint) &
+      !$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid, u12sq_j1bsq, grad12_j12, u12_grad1_u12_j1b_grad1_j1b)
+      !$OMP DO SCHEDULE (static)
+      do ipoint = 1, n_points_final_grid
+        do j = 1, ao_num
+          do i = 1, ao_num
+            int2_grad1_u12_square_ao(i,j,ipoint) = u12sq_j1bsq(i,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) + 0.5d0 * grad12_j12(i,j,ipoint)
+          enddo
+        enddo
+      enddo
+      !$OMP END DO
+      !$OMP END PARALLEL
+
+    endif
+
+  elseif(j1b_type .ge. 100) then
+
+    PROVIDE final_weight_at_r_vector_extra aos_in_r_array_extra
+    PROVIDE grad1_u12_squared_num
+
+    double precision, allocatable :: tmp(:,:,:)
+    allocate(tmp(n_points_extra_final_grid,ao_num,ao_num))
+    tmp = 0.d0
+    !$OMP PARALLEL               &
+    !$OMP DEFAULT (NONE)         &
+    !$OMP PRIVATE (j, i, jpoint) &
+    !$OMP SHARED (tmp, ao_num, n_points_extra_final_grid, final_weight_at_r_vector_extra, aos_in_r_array_extra_transp)
+    !$OMP DO SCHEDULE (static)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        do jpoint = 1, n_points_extra_final_grid
+          tmp(jpoint,i,j) = final_weight_at_r_vector_extra(jpoint) * aos_in_r_array_extra_transp(jpoint,i) * aos_in_r_array_extra_transp(jpoint,j)
+        enddo
+      enddo
+    enddo
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+    int2_grad1_u12_square_ao = 0.d0
+    call dgemm( "T", "N", ao_num*ao_num, n_points_final_grid, n_points_extra_final_grid, -0.5d0              &
+              , tmp(1,1,1), n_points_extra_final_grid, grad1_u12_squared_num(1,1), n_points_extra_final_grid &
+              , 0.d0, int2_grad1_u12_square_ao(1,1,1), ao_num*ao_num) 
+
+    !! this dgemm is equivalen to
+    !!$OMP PARALLEL                                                           &
+    !!$OMP DEFAULT (NONE)                                                     &
+    !!$OMP PRIVATE (i, j, ipoint, jpoint, w)                                  &
+    !!$OMP SHARED (int2_grad1_u12_square_ao, ao_num, n_points_final_grid,     &
+    !!$OMP         n_points_extra_final_grid, final_weight_at_r_vector_extra, &
+    !!$OMP         aos_in_r_array_extra_transp, grad1_u12_squared_num, tmp)
+    !!$OMP DO SCHEDULE (static)
+    !do ipoint = 1, n_points_final_grid
+    !  do j = 1, ao_num
+    !    do i = 1, ao_num
+    !      do jpoint = 1, n_points_extra_final_grid
+    !        w = -0.5d0 * tmp(jpoint,i,j)
+    !        int2_grad1_u12_square_ao(i,j,ipoint) += w * grad1_u12_squared_num(jpoint,ipoint)
+    !      enddo
+    !    enddo
+    !  enddo
+    !enddo
+    !!$OMP END DO
+    !!$OMP END PARALLEL
+
+    deallocate(tmp)
+  
+  else
+
+    print *, ' j1b_type = ', j1b_type, 'not implemented yet'
+    stop
+
+  endif
+
+  call wall_time(time1)
+  print*, ' wall time for int2_grad1_u12_square_ao =', time1-time0 
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f
index c535d0c5..a6e0a311 100644
--- a/src/non_h_ints_mu/test_non_h_ints.irp.f
+++ b/src/non_h_ints_mu/test_non_h_ints.irp.f
@@ -1,15 +1,25 @@
 program test_non_h
- implicit none
+
+  implicit none
+
   my_grid_becke  = .True.
   my_n_pt_r_grid = 50
   my_n_pt_a_grid = 74
+  !my_n_pt_r_grid = 400
+  !my_n_pt_a_grid = 974
+
 !  my_n_pt_r_grid = 10 ! small grid for quick debug
 !  my_n_pt_a_grid = 26 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
-!call routine_grad_squared
- call routine_fit
+
+  !call routine_grad_squared
+  !call routine_fit
+  
+  call test_ipp()
 end
 
+! ---
+
 subroutine routine_lapl_grad
  implicit none
  integer :: i,j,k,l
@@ -100,3 +110,445 @@ subroutine routine_fit
  enddo
 
 end
+
+
+subroutine test_ipp()
+
+  implicit none
+  integer                       :: i, j, k, l, ipoint
+  double precision              :: accu, norm, diff, old, new, eps, int_num
+  double precision              :: weight1, ao_i_r, ao_k_r
+  double precision, allocatable :: b_mat(:,:,:), I1(:,:,:,:), I2(:,:,:,:)
+
+  eps = 1d-7
+
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num))
+  b_mat = 0.d0
+
+  ! ---
+
+  ! first way
+
+  allocate(I1(ao_num,ao_num,ao_num,ao_num))
+  I1 = 0.d0
+
+  PROVIDE u12_grad1_u12_j1b_grad1_j1b
+
+  !$OMP PARALLEL               &
+  !$OMP DEFAULT (NONE)         &
+  !$OMP PRIVATE (i, k, ipoint) &
+  !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+  !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0                    &
+            , u12_grad1_u12_j1b_grad1_j1b(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid &
+            , 0.d0, I1, ao_num*ao_num)
+
+  ! ---
+
+  ! 2nd way
+
+  allocate(I2(ao_num,ao_num,ao_num,ao_num))
+  I2 = 0.d0
+
+  PROVIDE int2_u2_j1b2
+
+  b_mat = 0.d0
+  !$OMP PARALLEL                                                                                     &
+  !$OMP DEFAULT (NONE)                                                                               &
+  !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                                              &
+  !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector, &
+  !$OMP         v_1b_square_grad, v_1b_square_lapl, aos_grad_in_r_array_transp_bis)
+  !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+
+        weight1 = 0.25d0 * final_weight_at_r_vector(ipoint)
+
+        ao_i_r = aos_in_r_array_transp(ipoint,i)
+        ao_k_r = aos_in_r_array_transp(ipoint,k)
+
+        b_mat(ipoint,k,i) = weight1 * ( ao_k_r * ao_i_r * v_1b_square_lapl(ipoint)                                                                                   &
+                          + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) * v_1b_square_grad(ipoint,1) &
+                          + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) * v_1b_square_grad(ipoint,2) &
+                          + (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) + ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) * v_1b_square_grad(ipoint,3) )
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0     &
+            , int2_u2_j1b2(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid &
+            , 0.d0, I2, ao_num*ao_num)
+ 
+  ! ---
+
+  deallocate(b_mat)
+
+  accu = 0.d0
+  norm = 0.d0
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do l = 1, ao_num
+        do j = 1, ao_num
+
+          old = I1(j,l,k,i)
+          new = I2(j,l,k,i)
+
+          !print*, l, k, j, i
+          !print*, old, new
+          
+          diff = new - old
+          if(dabs(diff) .gt. eps) then
+            print*, ' problem on :', j, l, k, i
+            print*, ' diff      = ', diff
+            print*, ' old value = ', old
+            print*, ' new value = ', new
+            call I_grade_gradu_naive1(i, j, k, l, int_num)
+            print*, ' full num1 = ', int_num
+            call I_grade_gradu_naive2(i, j, k, l, int_num)
+            print*, ' full num2 = ', int_num
+            call I_grade_gradu_naive3(i, j, k, l, int_num)
+            print*, ' full num3 = ', int_num
+            call I_grade_gradu_naive4(i, j, k, l, int_num)
+            print*, ' full num4 = ', int_num
+            call I_grade_gradu_seminaive(i, j, k, l, int_num)
+            print*, ' semi num  = ', int_num
+          endif
+
+          accu += dabs(diff)
+          norm += dabs(old)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(I1, I2)
+
+  print*, ' accu = ', accu
+  print*, ' norm = ', norm
+
+  return
+end subroutine test_ipp
+
+! ---
+
+subroutine I_grade_gradu_naive1(i, j, k, l, int)
+
+  implicit none
+  integer,          intent(in)  :: i, j, k, l
+  double precision, intent(out) :: int
+  integer                       :: ipoint, jpoint
+  double precision              :: r1(3), r2(3)
+  double precision              :: weight1_x, weight1_y, weight1_z
+  double precision              :: weight2_x, weight2_y, weight2_z
+  double precision              :: aor_i, aor_j, aor_k, aor_l
+  double precision              :: e1_val, e2_val, e1_der(3), u12_val, u12_der(3)
+  double precision, external    :: j1b_nucl, j12_mu
+
+  int = 0.d0
+
+  do ipoint = 1, n_points_final_grid ! r1
+
+    r1(1) = final_grid_points(1,ipoint)
+    r1(2) = final_grid_points(2,ipoint)
+    r1(3) = final_grid_points(3,ipoint)
+
+    aor_i = aos_in_r_array_transp(ipoint,i)
+    aor_k = aos_in_r_array_transp(ipoint,k)
+
+    e1_val = j1b_nucl(r1)
+    call grad1_j1b_nucl(r1, e1_der)
+
+    weight1_x = aor_i * aor_k * e1_val * final_weight_at_r_vector(ipoint) * e1_der(1)
+    weight1_y = aor_i * aor_k * e1_val * final_weight_at_r_vector(ipoint) * e1_der(2)
+    weight1_z = aor_i * aor_k * e1_val * final_weight_at_r_vector(ipoint) * e1_der(3)
+
+    do jpoint = 1, n_points_extra_final_grid ! r2 
+
+      r2(1) = final_grid_points_extra(1,jpoint)
+      r2(2) = final_grid_points_extra(2,jpoint)
+      r2(3) = final_grid_points_extra(3,jpoint)
+
+      aor_j = aos_in_r_array_extra_transp(jpoint,j)
+      aor_l = aos_in_r_array_extra_transp(jpoint,l)
+
+      e2_val = j1b_nucl(r2)
+
+      u12_val = j12_mu(r1, r2)
+      call grad1_j12_mu(r1, r2, u12_der)
+
+      weight2_x = aor_j * aor_l * e2_val * e2_val * u12_val * final_weight_at_r_vector_extra(jpoint) * u12_der(1)
+      weight2_y = aor_j * aor_l * e2_val * e2_val * u12_val * final_weight_at_r_vector_extra(jpoint) * u12_der(2)
+      weight2_z = aor_j * aor_l * e2_val * e2_val * u12_val * final_weight_at_r_vector_extra(jpoint) * u12_der(3)
+
+      int = int - (weight1_x * weight2_x  + weight1_y * weight2_y + weight1_z * weight2_z)
+    enddo
+  enddo
+
+  return
+end subroutine I_grade_gradu_naive1
+
+! ---
+
+subroutine I_grade_gradu_naive2(i, j, k, l, int)
+
+  implicit none
+  integer,          intent(in)  :: i, j, k, l
+  double precision, intent(out) :: int
+  integer                       :: ipoint, jpoint
+  double precision              :: r1(3), r2(3)
+  double precision              :: weight1_x, weight1_y, weight1_z
+  double precision              :: weight2_x, weight2_y, weight2_z
+  double precision              :: aor_i, aor_j, aor_k, aor_l
+  double precision              :: e1_square_der(3), e2_val, u12_square_der(3)
+  double precision, external    :: j1b_nucl
+
+  int = 0.d0
+
+  do ipoint = 1, n_points_final_grid ! r1
+
+    r1(1) = final_grid_points(1,ipoint)
+    r1(2) = final_grid_points(2,ipoint)
+    r1(3) = final_grid_points(3,ipoint)
+
+    aor_i = aos_in_r_array_transp(ipoint,i)
+    aor_k = aos_in_r_array_transp(ipoint,k)
+
+    call grad1_j1b_nucl_square_num(r1, e1_square_der)
+
+    weight1_x = aor_i * aor_k * final_weight_at_r_vector(ipoint) * e1_square_der(1)
+    weight1_y = aor_i * aor_k * final_weight_at_r_vector(ipoint) * e1_square_der(2)
+    weight1_z = aor_i * aor_k * final_weight_at_r_vector(ipoint) * e1_square_der(3)
+
+    do jpoint = 1, n_points_extra_final_grid ! r2 
+
+      r2(1) = final_grid_points_extra(1,jpoint)
+      r2(2) = final_grid_points_extra(2,jpoint)
+      r2(3) = final_grid_points_extra(3,jpoint)
+
+      aor_j = aos_in_r_array_extra_transp(jpoint,j)
+      aor_l = aos_in_r_array_extra_transp(jpoint,l)
+
+      e2_val = j1b_nucl(r2)
+      call grad1_j12_mu_square_num(r1, r2, u12_square_der)
+
+      weight2_x = aor_j * aor_l * e2_val * e2_val * final_weight_at_r_vector_extra(jpoint) * u12_square_der(1)
+      weight2_y = aor_j * aor_l * e2_val * e2_val * final_weight_at_r_vector_extra(jpoint) * u12_square_der(2)
+      weight2_z = aor_j * aor_l * e2_val * e2_val * final_weight_at_r_vector_extra(jpoint) * u12_square_der(3)
+
+      int = int - 0.25d0 * (weight1_x * weight2_x  + weight1_y * weight2_y + weight1_z * weight2_z)
+    enddo
+  enddo
+
+  return
+end subroutine I_grade_gradu_naive2
+
+! ---
+
+subroutine I_grade_gradu_naive3(i, j, k, l, int)
+
+  implicit none
+  integer,          intent(in)  :: i, j, k, l
+  double precision, intent(out) :: int
+  integer                       :: ipoint, jpoint
+  double precision              :: r1(3), r2(3)
+  double precision              :: weight1, weight2
+  double precision              :: aor_j, aor_l
+  double precision              :: grad(3), e2_val, u12_val
+  double precision, external    :: j1b_nucl, j12_mu
+
+  int = 0.d0
+
+  do ipoint = 1, n_points_final_grid ! r1
+
+    r1(1) = final_grid_points(1,ipoint)
+    r1(2) = final_grid_points(2,ipoint)
+    r1(3) = final_grid_points(3,ipoint)
+
+    call grad1_aos_ik_grad1_esquare(i, k, r1, grad)
+
+    weight1 = final_weight_at_r_vector(ipoint) * (grad(1) + grad(2) + grad(3))
+
+    do jpoint = 1, n_points_extra_final_grid ! r2 
+
+      r2(1) = final_grid_points_extra(1,jpoint)
+      r2(2) = final_grid_points_extra(2,jpoint)
+      r2(3) = final_grid_points_extra(3,jpoint)
+
+      aor_j = aos_in_r_array_extra_transp(jpoint,j)
+      aor_l = aos_in_r_array_extra_transp(jpoint,l)
+
+      e2_val  = j1b_nucl(r2)
+      u12_val = j12_mu(r1, r2)
+
+      weight2 = aor_j * aor_l * e2_val * e2_val * u12_val * u12_val * final_weight_at_r_vector_extra(jpoint)
+
+      int = int + 0.25d0 * weight1 * weight2
+    enddo
+  enddo
+
+  return
+end subroutine I_grade_gradu_naive3
+
+! ---
+
+subroutine I_grade_gradu_naive4(i, j, k, l, int)
+
+  implicit none
+  integer,          intent(in)  :: i, j, k, l
+  double precision, intent(out) :: int
+  integer                       :: ipoint, jpoint
+  double precision              :: r1(3), r2(3)
+  double precision              :: weight1, weight2
+  double precision              :: aor_j, aor_l, aor_k, aor_i
+  double precision              :: grad(3), e2_val, u12_val
+  double precision, external    :: j1b_nucl, j12_mu
+
+  int = 0.d0
+
+  do ipoint = 1, n_points_final_grid ! r1
+
+    r1(1) = final_grid_points(1,ipoint)
+    r1(2) = final_grid_points(2,ipoint)
+    r1(3) = final_grid_points(3,ipoint)
+
+    aor_i = aos_in_r_array_transp(ipoint,i)
+    aor_k = aos_in_r_array_transp(ipoint,k)
+
+    weight1 = final_weight_at_r_vector(ipoint) * ( aor_k * aor_i * v_1b_square_lapl(ipoint)                                                          &
+            + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,1) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,1)) * v_1b_square_grad(ipoint,1) &
+            + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,2) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,2)) * v_1b_square_grad(ipoint,2) &
+            + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,3) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,3)) * v_1b_square_grad(ipoint,3) )
+
+    do jpoint = 1, n_points_extra_final_grid ! r2 
+
+      r2(1) = final_grid_points_extra(1,jpoint)
+      r2(2) = final_grid_points_extra(2,jpoint)
+      r2(3) = final_grid_points_extra(3,jpoint)
+
+      aor_j = aos_in_r_array_extra_transp(jpoint,j)
+      aor_l = aos_in_r_array_extra_transp(jpoint,l)
+
+      e2_val  = j1b_nucl(r2)
+      u12_val = j12_mu(r1, r2)
+
+      weight2 = aor_j * aor_l * e2_val * e2_val * u12_val * u12_val * final_weight_at_r_vector_extra(jpoint)
+
+      int = int + 0.25d0 * weight1 * weight2
+    enddo
+  enddo
+
+  return
+end subroutine I_grade_gradu_naive4
+
+! ---
+
+subroutine I_grade_gradu_seminaive(i, j, k, l, int)
+
+  implicit none
+  integer,          intent(in)  :: i, j, k, l
+  double precision, intent(out) :: int
+  integer                       :: ipoint
+  double precision              :: r1(3)
+  double precision              :: weight1
+  double precision              :: aor_i, aor_k
+
+  int = 0.d0
+
+  do ipoint = 1, n_points_final_grid ! r1
+
+    aor_i = aos_in_r_array_transp(ipoint,i)
+    aor_k = aos_in_r_array_transp(ipoint,k)
+
+    weight1 = 0.25d0 * final_weight_at_r_vector(ipoint) * ( aor_k * aor_i * v_1b_square_lapl(ipoint)                                                 &
+            + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,1) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,1)) * v_1b_square_grad(ipoint,1) &
+            + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,2) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,2)) * v_1b_square_grad(ipoint,2) &
+            + (aor_k * aos_grad_in_r_array_transp_bis(ipoint,i,3) + aor_i * aos_grad_in_r_array_transp_bis(ipoint,k,3)) * v_1b_square_grad(ipoint,3) )
+
+    int = int + weight1 * int2_u2_j1b2(j,l,ipoint)
+  enddo
+
+  return
+end subroutine I_grade_gradu_seminaive
+
+! ---
+
+subroutine aos_ik_grad1_esquare(i, k, r1, val)
+
+  implicit none
+  integer,          intent(in)  :: i, k
+  double precision, intent(in)  :: r1(3)
+  double precision, intent(out) :: val(3)
+  double precision              :: tmp
+  double precision              :: der(3), aos_array(ao_num), aos_grad_array(3,ao_num)
+
+  call give_all_aos_and_grad_at_r(r1, aos_array, aos_grad_array)
+  call grad1_j1b_nucl_square_num(r1, der)
+
+  tmp    = aos_array(i) * aos_array(k)
+  val(1) = tmp * der(1)
+  val(2) = tmp * der(2)
+  val(3) = tmp * der(3)
+
+  return
+end subroutine phi_ik_grad1_esquare
+
+! ---
+
+subroutine grad1_aos_ik_grad1_esquare(i, k, r1, grad)
+
+  implicit none
+  integer,          intent(in)  :: i, k
+  double precision, intent(in)  :: r1(3)
+  double precision, intent(out) :: grad(3)
+  double precision              :: r(3), eps, tmp_eps, val_p(3), val_m(3)
+
+  eps     = 1d-5
+  tmp_eps = 0.5d0 / eps
+
+  r(1:3) = r1(1:3)
+
+  r(1) = r(1) + eps
+  call aos_ik_grad1_esquare(i, k, r, val_p)
+  r(1) = r(1) - 2.d0 * eps
+  call aos_ik_grad1_esquare(i, k, r, val_m)
+  r(1) = r(1) + eps
+  grad(1) = tmp_eps * (val_p(1) - val_m(1))
+
+  r(2) = r(2) + eps
+  call aos_ik_grad1_esquare(i, k, r, val_p)
+  r(2) = r(2) - 2.d0 * eps
+  call aos_ik_grad1_esquare(i, k, r, val_m)
+  r(2) = r(2) + eps
+  grad(2) = tmp_eps * (val_p(2) - val_m(2))
+
+  r(3) = r(3) + eps
+  call aos_ik_grad1_esquare(i, k, r, val_p)
+  r(3) = r(3) - 2.d0 * eps
+  call aos_ik_grad1_esquare(i, k, r, val_m)
+  r(3) = r(3) + eps
+  grad(3) = tmp_eps * (val_p(3) - val_m(3))
+
+  return
+end subroutine grad1_aos_ik_grad1_esquare
+
+! ---
+
+
+
+
+
+
+
diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f
index 4f8dc74d..450bbef0 100644
--- a/src/non_h_ints_mu/total_tc_int.irp.f
+++ b/src/non_h_ints_mu/total_tc_int.irp.f
@@ -11,6 +11,13 @@ BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num,
   call wall_time(wall0)
   
   if(test_cycle_tc) then
+
+    PROVIDE j1b_type
+    if(j1b_type .ne. 3) then
+      print*, ' TC integrals with cycle can not be used for j1b_type =', j1b_type
+      stop
+    endif
+
     do j = 1, ao_num
       do l = 1, ao_num
         do i = 1, ao_num
@@ -20,7 +27,9 @@ BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num,
         enddo
       enddo
     enddo
+
   else
+
     do j = 1, ao_num
       do l = 1, ao_num
         do i = 1, ao_num
@@ -30,6 +39,7 @@ BEGIN_PROVIDER [double precision, ao_vartc_int_chemist, (ao_num, ao_num, ao_num,
         enddo
       enddo
     enddo
+
   endif
 
   call wall_time(wall1)
@@ -48,9 +58,20 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao
   print *, ' providing ao_tc_int_chemist ...'
   call wall_time(wall0)
   
-  if(test_cycle_tc)then
+  if(test_cycle_tc) then
+
+    PROVIDE j1b_type
+    if(j1b_type .ne. 3) then
+      print*, ' TC integrals with cycle can not be used for j1b_type =', j1b_type
+      stop
+    endif
+
     ao_tc_int_chemist = ao_tc_int_chemist_test
+
   else
+
+    PROVIDE tc_grad_square_ao tc_grad_and_lapl_ao ao_two_e_coul
+
     do j = 1, ao_num
       do l = 1, ao_num
         do i = 1, ao_num
@@ -68,27 +89,34 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao
 
 END_PROVIDER 
 
-BEGIN_PROVIDER [double precision, ao_tc_int_chemist_no_cycle, (ao_num, ao_num, ao_num, ao_num)]
 ! ---
+
+BEGIN_PROVIDER [double precision, ao_tc_int_chemist_no_cycle, (ao_num, ao_num, ao_num, ao_num)]
+
   implicit none
   integer          :: i, j, k, l
   double precision :: wall1, wall0
+
   print *, ' providing ao_tc_int_chemist_no_cycle ...'
   call wall_time(wall0)
-    do j = 1, ao_num
-      do l = 1, ao_num
-        do i = 1, ao_num
-          do k = 1, ao_num
-            ao_tc_int_chemist_no_cycle(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
-!            ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j)
-          enddo
+
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          ao_tc_int_chemist_no_cycle(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+          !ao_tc_int_chemist(k,i,l,j) = ao_two_e_coul(k,i,l,j)
         enddo
       enddo
     enddo
+  enddo
+
   call wall_time(wall1)
   print *, ' wall time for ao_tc_int_chemist_no_cycle ', wall1 - wall0
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)]
 
   implicit none
diff --git a/src/tc_bi_ortho/dressing_vectors_lr.irp.f b/src/tc_bi_ortho/dressing_vectors_lr.irp.f
index 08913bab..ed663f02 100644
--- a/src/tc_bi_ortho/dressing_vectors_lr.irp.f
+++ b/src/tc_bi_ortho/dressing_vectors_lr.irp.f
@@ -27,7 +27,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta)
 
   i = 1
   j = 1
-  call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+  call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
   call hmat_bi_ortho         (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
 
   delta = 0.d0
@@ -39,7 +39,7 @@ subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta)
     do j = 1, ndet
 
       ! < I | Htilde | J >
-      call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+      call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
       ! < I | H | J >
       call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
 
@@ -78,7 +78,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta)
 
   i = 1
   j = 1
-  call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+  call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
 
   delta = 0.d0
  !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8)   &
@@ -88,7 +88,7 @@ subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta)
     do j = 1, ndet
 
       ! < I | Htilde | J >
-      call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+      call htilde_mu_mat_bi_ortho_slow(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
 
       delta(i) = delta(i) + psicoef(j) * htc_tot
     enddo
diff --git a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f
index ec66a8b5..6d5c3b21 100644
--- a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f
@@ -2,7 +2,7 @@
  BEGIN_PROVIDER [ double precision, e_tilde_00]
  implicit none
  double precision :: hmono,htwoe,hthree,htot
- call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot)
+ call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot)
  e_tilde_00 = htot
  END_PROVIDER 
 
@@ -18,11 +18,11 @@
  do i = 1, N_det
   call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
   if(degree == 1 .or. degree == 2)then
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
    delta_e = e_tilde_00 - e_i0
    coef_pt1 = htilde_ij / delta_e
-   call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
    e_pt2_tc_bi_orth += coef_pt1 * htilde_ij
    if(degree == 1)then
     e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij
@@ -37,7 +37,7 @@
  BEGIN_PROVIDER [ double precision, e_tilde_bi_orth_00]
  implicit none
  double precision :: hmono,htwoe,hthree,htilde_ij
- call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00)
+ call htilde_mu_mat_bi_ortho_slow(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00)
  e_tilde_bi_orth_00 += nuclear_repulsion
  END_PROVIDER 
 
@@ -45,6 +45,9 @@
 &BEGIN_PROVIDER [ double precision, e_corr_bi_orth_proj ]
 &BEGIN_PROVIDER [ double precision, e_corr_single_bi_orth ]
 &BEGIN_PROVIDER [ double precision, e_corr_double_bi_orth ]
+&BEGIN_PROVIDER [ double precision, e_corr_bi_orth_proj_abs ]
+&BEGIN_PROVIDER [ double precision, e_corr_single_bi_orth_abs ]
+&BEGIN_PROVIDER [ double precision, e_corr_double_bi_orth_abs ]
  implicit none 
  integer :: i,degree
  double precision :: hmono,htwoe,hthree,htilde_ij
@@ -54,16 +57,18 @@
  e_corr_double_bi_orth = 0.d0
  do i = 1, N_det
   call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
-  call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+  call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
   if(degree == 1)then
    e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)
+   e_corr_single_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1))
   else if(degree == 2)then
    e_corr_double_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)
-!   print*,'coef_wf , e_cor',reigvec_tc_bi_orth(i,1)/reigvec_tc_bi_orth(1,1), reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)
+   e_corr_double_bi_orth_abs += dabs(reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1))
   endif
  enddo
  e_corr_bi_orth_proj = e_corr_single_bi_orth + e_corr_double_bi_orth
  e_corr_bi_orth = eigval_right_tc_bi_orth(1) - e_tilde_bi_orth_00
+ e_corr_bi_orth_proj_abs = e_corr_single_bi_orth_abs + e_corr_double_bi_orth_abs
  END_PROVIDER 
 
  BEGIN_PROVIDER [ double precision, e_tc_left_right ]
@@ -75,7 +80,7 @@
  do i = 1, N_det
   accu += reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(i,1)
   do j = 1, N_det
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
    e_tc_left_right += htilde_ij * reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(j,1)
   enddo
  enddo
@@ -94,8 +99,8 @@ BEGIN_PROVIDER [ double precision, coef_pt1_bi_ortho, (N_det)]
   if(degree==0)then
    coef_pt1_bi_ortho(i) = 1.d0
   else
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
    delta_e = e_tilde_00 - e_i0
    coef_pt1 = htilde_ij / delta_e
    coef_pt1_bi_ortho(i)= coef_pt1
diff --git a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
index b7129d36..1d1b26cc 100644
--- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
+++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
@@ -1,4 +1,4 @@
-subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze)
+subroutine htc_bi_ortho_calc_tdav_slow(v, u, N_st, sze)
 
   use bitmasks
 
@@ -27,7 +27,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze)
 
   i = 1
   j = 1
-  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
 
   v = 0.d0
  !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) &
@@ -36,7 +36,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze)
   do istate = 1, N_st
     do i = 1, sze
       do j = 1, sze
-        call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+        call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
         v(i,istate) = v(i,istate) + htot * u(j,istate)
       enddo
     enddo 
@@ -45,7 +45,7 @@ subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze)
 
 end 
 
-subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze)
+subroutine htcdag_bi_ortho_calc_tdav_slow(v, u, N_st, sze)
 
   use bitmasks
 
@@ -71,7 +71,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze)
 
   i = 1
   j = 1
-  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
 
   v = 0.d0
 
@@ -81,7 +81,7 @@ subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze)
   do istate = 1, N_st
     do i = 1, sze
       do j = 1, sze
-        call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot)
+        call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, htot)
         v(i,istate) = v(i,istate) + htot * u(j,istate)
       enddo
     enddo
diff --git a/src/tc_bi_ortho/h_tc_s2_u0.irp.f b/src/tc_bi_ortho/h_tc_s2_u0.irp.f
index b9b85a96..c767f090 100644
--- a/src/tc_bi_ortho/h_tc_s2_u0.irp.f
+++ b/src/tc_bi_ortho/h_tc_s2_u0.irp.f
@@ -1,7 +1,6 @@
 
-subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2)
-  use bitmasks
-  implicit none
+subroutine get_H_tc_s2_l0_r0(l_0, r_0, N_st, sze, energies, s2)
+
   BEGIN_DOC
   ! Computes $e_0 = \langle l_0 | H | r_0\rangle$.
   !
@@ -11,26 +10,34 @@ subroutine get_H_tc_s2_l0_r0(l_0,r_0,N_st,sze,energies, s2)
   !
   ! istart, iend, ishift, istep are used in ZMQ parallelization.
   END_DOC
-  integer, intent(in)              :: N_st,sze
-  double precision, intent(inout)  :: l_0(sze,N_st), r_0(sze,N_st)
-  double precision, intent(out)    :: energies(N_st), s2(N_st)
-  logical           :: do_right
-  integer :: istate
+
+  use bitmasks
+  implicit none
+
+  integer,          intent(in)  :: N_st,sze
+  double precision, intent(in)  :: l_0(sze,N_st), r_0(sze,N_st)
+  double precision, intent(out) :: energies(N_st), s2(N_st)
+  logical                       :: do_right 
+  integer                       :: istate
   double precision, allocatable :: s_0(:,:), v_0(:,:)
-  double precision :: u_dot_v, norm
+  double precision              :: u_dot_v, norm
+
   allocate(s_0(sze,N_st), v_0(sze,N_st))
   do_right = .True.
-  call H_tc_s2_u_0_opt(v_0,s_0,r_0,N_st,sze)
+  call H_tc_s2_u_0_opt(v_0, s_0, r_0, N_st, sze)
+
   do istate = 1, N_st
-   norm = u_dot_v(l_0(1,istate),r_0(1,istate),sze)
-   energies(istate) = u_dot_v(l_0(1,istate),v_0(1,istate),sze)/norm
-   s2(istate) = u_dot_v(l_0(1,istate),s_0(1,istate),sze)/norm
+    norm = u_dot_v(l_0(1,istate),r_0(1,istate),sze)
+    energies(istate) = u_dot_v(l_0(1,istate),v_0(1,istate),sze)/norm
+    s2(istate) = u_dot_v(l_0(1,istate),s_0(1,istate),sze)/norm
   enddo
+
 end
 
-subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze)
-  use bitmasks
-  implicit none
+! ---
+
+subroutine H_tc_s2_u_0_opt(v_0, s_0, u_0, N_st, sze)
+
   BEGIN_DOC
   ! Computes $v_0 = H | u_0\rangle$.
   !
@@ -38,16 +45,24 @@ subroutine H_tc_s2_u_0_opt(v_0,s_0,u_0,N_st,sze)
   !
   ! istart, iend, ishift, istep are used in ZMQ parallelization.
   END_DOC
-  integer, intent(in)              :: N_st,sze
-  double precision, intent(inout)  :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st)
-  logical           :: do_right
+
+  use bitmasks
+  implicit none
+
+  integer,          intent(in)  :: N_st,sze
+  double precision, intent(in)  :: u_0(sze,N_st)
+  double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
+  logical                       :: do_right 
+
   do_right = .True.
-  call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right)
+  call H_tc_s2_u_0_nstates_openmp(v_0, s_0, u_0, N_st, sze, do_right)
+
 end
 
-subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze)
-  use bitmasks
-  implicit none
+! ---
+
+subroutine H_tc_s2_dagger_u_0_opt(v_0, s_0, u_0, N_st, sze)
+
   BEGIN_DOC
   ! Computes $v_0 = H | u_0\rangle$.
   !
@@ -55,17 +70,23 @@ subroutine H_tc_s2_dagger_u_0_opt(v_0,s_0,u_0,N_st,sze)
   !
   ! istart, iend, ishift, istep are used in ZMQ parallelization.
   END_DOC
-  integer, intent(in)            :: N_st,sze
-  double precision, intent(inout)  :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st)
-  logical           :: do_right
-  do_right = .False.
-  call H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right)
-end
 
-
-subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right)
   use bitmasks
   implicit none
+  integer,          intent(in)  :: N_st,sze
+  double precision, intent(in)  :: u_0(sze,N_st)
+  double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
+  logical                       :: do_right 
+
+  do_right = .False.
+  call H_tc_s2_u_0_nstates_openmp(v_0, s_0, u_0, N_st, sze, do_right)
+
+end
+
+! ---
+
+subroutine H_tc_s2_u_0_nstates_openmp(v_0, s_0, u_0, N_st, sze, do_right)
+
   BEGIN_DOC
   ! Computes $v_0 = H | u_0\rangle$.
   !
@@ -75,12 +96,18 @@ subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right)
   !
   ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi>
   END_DOC
-  integer, intent(in)            :: N_st,sze
-  double precision, intent(inout)  :: v_0(sze,N_st), u_0(sze,N_st), s_0(sze,N_st)
-  logical, intent(in)             :: do_right
-  integer :: k
-  double precision, allocatable  :: u_t(:,:), v_t(:,:), s_t(:,:)
+
+  use bitmasks
+  implicit none
+
+  integer,          intent(in)  :: N_st,sze
+  logical,          intent(in)  :: do_right 
+  double precision, intent(in)  :: u_0(sze,N_st)
+  double precision, intent(out) :: v_0(sze,N_st), s_0(sze,N_st)
+  integer                       :: k
+  double precision, allocatable :: u_t(:,:), v_t(:,:), s_t(:,:)
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
+
   allocate(u_t(N_st,N_det),v_t(N_st,N_det),s_t(N_st,N_det))
   do k=1,N_st
     call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
@@ -119,6 +146,7 @@ subroutine H_tc_s2_u_0_nstates_openmp(v_0,s_0,u_0,N_st,sze, do_right)
 
 end
 
+! ---
 
 subroutine H_tc_s2_u_0_nstates_openmp_work(v_t,s_t,u_t,N_st,sze,istart,iend,ishift,istep, do_right)
   use bitmasks
diff --git a/src/tc_bi_ortho/normal_ordered.irp.f b/src/tc_bi_ortho/normal_ordered.irp.f
index 81f5fb2c..8adc7a63 100644
--- a/src/tc_bi_ortho/normal_ordered.irp.f
+++ b/src/tc_bi_ortho/normal_ordered.irp.f
@@ -48,10 +48,16 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_
         h2 = list_act(hh2) 
         do pp2 = 1, n_act_orb
           p2 = list_act(pp2)
-          ! opposite spin double excitations 
+          ! all contributions from the 3-e terms to the double excitations 
+          ! s1:(h1-->p1), s2:(h2-->p2) from the HF reference determinant 
+    
+
+          ! opposite spin double excitations : s1 /= s2
           call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba)
-          ! same spin double excitations with opposite spin contributions 
+
+          ! same spin double excitations : s1 == s2 
           if(h1<h2.and.p1.gt.p2)then
+           ! with opposite spin contributions 
            call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2
            ! same spin double excitations with same spin contributions 
            if(Ne(2).ge.3)then
@@ -60,8 +66,10 @@ BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_
              hthree_aaa = 0.d0
            endif
           else
+           ! with opposite spin contributions 
            call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab)
            if(Ne(2).ge.3)then
+            ! same spin double excitations with same spin contributions 
              call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa)
            else
              hthree_aaa = 0.d0
@@ -246,6 +254,9 @@ END_PROVIDER
 
 subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree)
 
+  BEGIN_DOC
+! pure same spin contribution to same spin double excitation s1=h1,p1, s2=h2,p2, with s1==s2
+  END_DOC
   use bitmasks ! you need to include the bitmasks_module.f90 features
 
   implicit none
diff --git a/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f b/src/tc_bi_ortho/print_tc_energy.irp.f
similarity index 72%
rename from src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f
rename to src/tc_bi_ortho/print_tc_energy.irp.f
index 5eb3c069..980d12de 100644
--- a/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f
+++ b/src/tc_bi_ortho/print_tc_energy.irp.f
@@ -1,4 +1,4 @@
-program tc_bi_ortho
+program print_tc_energy
   implicit none
   BEGIN_DOC
 ! TODO : Put the documentation of the program here
@@ -9,7 +9,11 @@ program tc_bi_ortho
   my_n_pt_a_grid = 50
   read_wf = .True.
   touch read_wf
+
+  PROVIDE j1b_type
+  print*, 'j1b_type = ', j1b_type
+
   touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
-  call routine_save_left_right_bi_ortho
-! call test
+ call write_tc_energy
 end
+
diff --git a/src/tc_bi_ortho/print_tc_var.irp.f b/src/tc_bi_ortho/print_tc_var.irp.f
new file mode 100644
index 00000000..fa0a4363
--- /dev/null
+++ b/src/tc_bi_ortho/print_tc_var.irp.f
@@ -0,0 +1,20 @@
+program print_tc_var
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  call write_tc_var()
+
+end
+
diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f
index 58a733a7..0c4198a9 100644
--- a/src/tc_bi_ortho/print_tc_wf.irp.f
+++ b/src/tc_bi_ortho/print_tc_wf.irp.f
@@ -26,7 +26,8 @@ subroutine write_l_r_wf
  integer :: i
  print*,'Writing the left-right wf'
  do i = 1, N_det
-  write(i_unit_output,*)i,psi_l_coef_sorted_bi_ortho_left(i),psi_r_coef_sorted_bi_ortho_right(i)
+  write(i_unit_output,*)i, psi_l_coef_sorted_bi_ortho_left(i)/psi_l_coef_sorted_bi_ortho_left(1) &
+                         , psi_r_coef_sorted_bi_ortho_right(i)/psi_r_coef_sorted_bi_ortho_right(1)
  enddo
 
 
@@ -48,12 +49,12 @@ subroutine routine
  do i = 1, N_det
   call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
    if(degree == 1 .or. degree == 2)then
-    call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
-    call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+    call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
     delta_e = e_tilde_00 - e_i0
     coef_pt1 = htilde_ij / delta_e
  
-    call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+    call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
     contrib_pt = coef_pt1 * htilde_ij
     e_pt2 += contrib_pt
  
diff --git a/src/tc_bi_ortho/psi_left_qmc.irp.f b/src/tc_bi_ortho/psi_left_qmc.irp.f
index 25048f82..4e3b8e86 100644
--- a/src/tc_bi_ortho/psi_left_qmc.irp.f
+++ b/src/tc_bi_ortho/psi_left_qmc.irp.f
@@ -17,6 +17,8 @@ BEGIN_PROVIDER  [ double precision, psi_bitcleft_bilinear_matrix_values, (N_det,
   implicit none
   integer :: k, l
 
+  !print *, ' providing psi_bitcleft_bilinear_matrix_values'
+
   if(N_det .eq. 1) then
 
     do l = 1, N_states
@@ -38,6 +40,8 @@ BEGIN_PROVIDER  [ double precision, psi_bitcleft_bilinear_matrix_values, (N_det,
 
   endif
 
+  !print *, ' psi_bitcleft_bilinear_matrix_values OK'
+
 END_PROVIDER
 
 ! ---
diff --git a/src/tc_bi_ortho/psi_r_l_prov.irp.f b/src/tc_bi_ortho/psi_r_l_prov.irp.f
index 521acff5..b28c417f 100644
--- a/src/tc_bi_ortho/psi_r_l_prov.irp.f
+++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f
@@ -136,7 +136,7 @@ BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states)
 END_PROVIDER
 
 
-subroutine save_tc_wavefunction_general(ndet,nstates,psidet,sze,dim_psicoef,psilcoef,psircoef)
+subroutine save_tc_wavefunction_general(ndet, nstates, psidet, sze, dim_psicoef, psilcoef, psircoef)
   implicit none
   BEGIN_DOC
   !  Save the wave function into the |EZFIO| file
@@ -192,37 +192,78 @@ subroutine save_tc_wavefunction_general(ndet,nstates,psidet,sze,dim_psicoef,psil
   endif
 end
 
-subroutine save_tc_bi_ortho_wavefunction
- implicit none
- if(save_sorted_tc_wf)then
-  call save_tc_wavefunction_general(N_det,N_states,psi_det_sorted_tc,size(psi_det_sorted_tc, 3),size(psi_l_coef_sorted_bi_ortho, 1),psi_l_coef_sorted_bi_ortho,psi_r_coef_sorted_bi_ortho)
- else
-  call save_tc_wavefunction_general(N_det,N_states,psi_det,size(psi_det, 3), size(psi_l_coef_bi_ortho, 1),psi_l_coef_bi_ortho,psi_r_coef_bi_ortho)
- endif
- call routine_save_right_bi_ortho
+! ---
+
+subroutine save_tc_bi_ortho_wavefunction()
+
+  implicit none
+
+  if(save_sorted_tc_wf) then
+
+    call save_tc_wavefunction_general( N_det, N_states, psi_det_sorted_tc, size(psi_det_sorted_tc, 3) &
+                                     , size(psi_l_coef_sorted_bi_ortho, 1), psi_l_coef_sorted_bi_ortho, psi_r_coef_sorted_bi_ortho)
+    call routine_save_right_sorted_bi_ortho()
+
+  else
+
+    call save_tc_wavefunction_general( N_det, N_states, psi_det, size(psi_det, 3) &
+                                     , size(psi_l_coef_bi_ortho, 1), psi_l_coef_bi_ortho, psi_r_coef_bi_ortho )
+    call routine_save_right_bi_ortho()
+
+  endif
+
 end
 
-subroutine routine_save_right_bi_ortho
- implicit none
- double precision, allocatable :: coef_tmp(:,:)
- integer :: i
- allocate(coef_tmp(N_det, N_states))
- do i = 1, N_det
-  coef_tmp(i,1:N_states) = psi_r_coef_sorted_bi_ortho(i,1:N_states)
- enddo
- call save_wavefunction_general_unormalized(N_det,N_states,psi_det_sorted_tc,size(coef_tmp,1),coef_tmp(1,1))
-end                     
+! ---
+
+subroutine routine_save_right_sorted_bi_ortho()
+
+  implicit none
+  integer                       :: i
+  double precision, allocatable :: coef_tmp(:,:)
+
+  allocate(coef_tmp(N_det, N_states))
+  do i = 1, N_det
+    coef_tmp(i,1:N_states) = psi_r_coef_sorted_bi_ortho(i,1:N_states)
+  enddo
+  call save_wavefunction_general_unormalized(N_det, N_states, psi_det_sorted_tc, size(coef_tmp, 1), coef_tmp(1,1))
+  deallocate(coef_tmp)
 
-subroutine routine_save_left_right_bi_ortho
- implicit none
- double precision, allocatable :: coef_tmp(:,:)
- integer :: i,n_states_tmp
- n_states_tmp = 2
- allocate(coef_tmp(N_det, n_states_tmp))
- do i = 1, N_det
-  coef_tmp(i,1) = psi_r_coef_bi_ortho(i,1)
-  coef_tmp(i,2) = psi_l_coef_bi_ortho(i,1)
- enddo
- call save_wavefunction_general_unormalized(N_det,n_states_tmp,psi_det,size(coef_tmp,1),coef_tmp(1,1))
 end
 
+subroutine routine_save_left_right_sorted_bi_ortho()
+
+  implicit none
+  integer                       :: i, n_states_tmp
+  double precision, allocatable :: coef_tmp(:,:)
+
+  n_states_tmp = 2
+  allocate(coef_tmp(N_det, n_states_tmp))
+  do i = 1, N_det
+    coef_tmp(i,1) = psi_r_coef_bi_ortho(i,1)
+    coef_tmp(i,2) = psi_l_coef_bi_ortho(i,1)
+  enddo
+  call save_wavefunction_general_unormalized(N_det, n_states_tmp, psi_det, size(coef_tmp, 1), coef_tmp(1,1))
+  deallocate(coef_tmp)
+end
+
+! ---
+
+subroutine routine_save_right_bi_ortho()
+
+  implicit none
+  integer                       :: i
+  double precision, allocatable :: coef_tmp(:,:)
+
+  allocate(coef_tmp(N_det, N_states))
+  do i = 1, N_det
+    coef_tmp(i,1:N_states) = psi_r_coef_bi_ortho(i,1:N_states)
+  enddo
+  call save_wavefunction_general_unormalized(N_det, N_states, psi_det, size(coef_tmp, 1), coef_tmp(1,1))
+  deallocate(coef_tmp)
+
+end
+
+! ---
+
+
diff --git a/src/tc_bi_ortho/pt2_tc_cisd.irp.f b/src/tc_bi_ortho/pt2_tc_cisd.irp.f
new file mode 100644
index 00000000..9cb9a600
--- /dev/null
+++ b/src/tc_bi_ortho/pt2_tc_cisd.irp.f
@@ -0,0 +1,125 @@
+program pt2_tc_cisd
+
+  BEGIN_DOC
+  !
+  ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together 
+  !        with the energy. Saves the left-right wave functions at the end. 
+  !
+  END_DOC
+
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  print*, ' nb of states = ', N_states
+  print*, ' nb of det    = ', N_det
+  call routine_diag()
+
+  call routine
+end
+
+subroutine routine
+ implicit none
+ integer :: i,h1,p1,h2,p2,s1,s2,degree
+ double precision :: h0i,hi0,e00,ei,delta_e
+ double precision :: norm,e_corr,coef,e_corr_pos,e_corr_neg,e_corr_abs
+
+ integer                        :: exc(0:2,2,2)
+ double precision               :: phase
+ double precision :: eh1,ep1,eh2,ep2
+
+ norm = 0.d0 
+ e_corr = 0.d0
+ e_corr_abs = 0.d0
+ e_corr_pos = 0.d0
+ e_corr_neg = 0.d0
+ call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,1), N_int, e00) 
+ do i = 2, N_det
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,1), N_int, hi0) 
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,1), psi_det(1,1,i), N_int, h0i) 
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, ei) 
+  call get_excitation_degree(psi_det(1,1,1), psi_det(1,1,i),degree,N_int)
+  call get_excitation(psi_det(1,1,1), psi_det(1,1,i),exc,degree,phase,N_int)
+  call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
+  eh1 = Fock_matrix_tc_diag_mo_tot(h1)
+  ep1 = Fock_matrix_tc_diag_mo_tot(p1)
+  delta_e = eh1 - ep1
+  if (degree==2)then
+   eh2 = Fock_matrix_tc_diag_mo_tot(h2)
+   ep2 = Fock_matrix_tc_diag_mo_tot(p2)
+   delta_e +=  eh2 - ep2
+  endif
+!  delta_e = e00 - ei
+  coef = hi0/delta_e
+  norm += coef*coef
+  e_corr = coef* h0i
+  if(e_corr.lt.0.d0)then
+   e_corr_neg += e_corr
+  elseif(e_corr.gt.0.d0)then
+   e_corr_pos += e_corr
+  endif
+  e_corr_abs += dabs(e_corr)
+ enddo
+ print*,'e_corr_abs = ',e_corr_abs
+ print*,'e_corr_pos = ',e_corr_pos
+ print*,'e_corr_neg = ',e_corr_neg
+ print*,'norm       = ',dsqrt(norm)
+
+end
+
+subroutine routine_diag()
+
+  implicit none
+  integer          :: i, j, k
+  double precision :: dE
+
+  ! provide eigval_right_tc_bi_orth
+  ! provide overlap_bi_ortho
+  ! provide htilde_matrix_elmt_bi_ortho
+
+  if(N_states .eq. 1) then
+
+    print*,'eigval_right_tc_bi_orth   = ',eigval_right_tc_bi_orth(1)
+    print*,'e_tc_left_right           = ',e_tc_left_right
+    print*,'e_tilde_bi_orth_00        = ',e_tilde_bi_orth_00
+    print*,'e_pt2_tc_bi_orth          = ',e_pt2_tc_bi_orth
+    print*,'e_pt2_tc_bi_orth_single   = ',e_pt2_tc_bi_orth_single
+    print*,'e_pt2_tc_bi_orth_double   = ',e_pt2_tc_bi_orth_double
+    print*,'***'                      
+    print*,'e_corr_bi_orth            = ',e_corr_bi_orth
+    print*,'e_corr_bi_orth_proj       = ',e_corr_bi_orth_proj
+    print*,'e_corr_bi_orth_proj_abs   = ',e_corr_bi_orth_proj_abs
+    print*,'e_corr_single_bi_orth     = ',e_corr_single_bi_orth
+    print*,'e_corr_double_bi_orth     = ',e_corr_double_bi_orth
+    print*,'e_corr_single_bi_orth_abs = ',e_corr_single_bi_orth_abs
+    print*,'e_corr_double_bi_orth_abs = ',e_corr_double_bi_orth_abs
+    print*,'Left/right eigenvectors'
+    do i = 1,N_det
+      write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1)
+    enddo
+
+  else
+
+    print*,'eigval_right_tc_bi_orth : '
+    do i = 1, N_states
+      print*, i, eigval_right_tc_bi_orth(i)
+    enddo
+
+    print*,''
+    print*,'******************************************************'
+    print*,'TC Excitation energies (au)                     (eV)'
+    do i = 2, N_states
+      dE = eigval_right_tc_bi_orth(i) - eigval_right_tc_bi_orth(1)
+      print*, i, dE, dE/0.0367502d0
+    enddo
+    print*,''
+
+  endif
+
+end
+
+
+
diff --git a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
similarity index 83%
rename from src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet
rename to src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
index eb812401..efa4aa2c 100644
--- a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet
+++ b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
@@ -1,5 +1,18 @@
 program save_bitcpsileft_for_qmcchem
 
+  implicit none
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  call main()
+
+end
+
+
+subroutine main()
+
+  implicit none
   integer          :: iunit
   logical          :: exists
   double precision :: e_ref
@@ -46,7 +59,7 @@ program save_bitcpsileft_for_qmcchem
 
   close(iunit)
 
-end
+end subroutine main
 
 ! --
 
@@ -61,12 +74,18 @@ subroutine write_lr_spindeterminants()
 
   PROVIDE psi_bitcleft_bilinear_matrix_values
 
+  print *, ' saving left determinants'
+  print *, ' assuming save_for_qmc called before to save right determinants'
+  print *, ' N_det    = ', N_det
+  print *, ' N_states = ', N_states
+
   allocate(buffer(N_det,N_states))
   do l = 1, N_states
     do k = 1, N_det
       buffer(k,l) = psi_bitcleft_bilinear_matrix_values(k,l)
     enddo
   enddo
+
   call ezfio_set_spindeterminants_psi_left_coef_matrix_values(buffer)
   deallocate(buffer)
 
diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f
similarity index 88%
rename from src/tc_bi_ortho/slater_tc_3e.irp.f
rename to src/tc_bi_ortho/slater_tc_3e_slow.irp.f
index 7b73d5f2..6abb6b78 100644
--- a/src/tc_bi_ortho/slater_tc_3e.irp.f
+++ b/src/tc_bi_ortho/slater_tc_3e_slow.irp.f
@@ -1,23 +1,5 @@
-subroutine provide_all_three_ints_bi_ortho
- implicit none
- BEGIN_DOC
-! routine that provides all necessary three-electron integrals 
- END_DOC
- if(three_body_h_tc)then
-  PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort
-  PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort
-  PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort
-  PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort
- endif
-if(.not.double_normal_ord)then
-  PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort
-  PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort
-else
- PROVIDE normal_two_body_bi_orth
-endif
-end
 
-subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
+subroutine diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree)
 
   BEGIN_DOC
   !  diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS
@@ -108,7 +90,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
 end
 
 
-subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+subroutine single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree)
 
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS
@@ -203,7 +185,7 @@ end
 
 ! ---
 
-subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+subroutine double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree)
 
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> for double excitation ONLY FOR THREE-BODY TERMS  WITH BI ORTHONORMAL ORBITALS
diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f
index a19d4688..3fd2576a 100644
--- a/src/tc_bi_ortho/slater_tc_opt.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@@ -1,3 +1,26 @@
+subroutine provide_all_three_ints_bi_ortho
+ implicit none
+ BEGIN_DOC
+! routine that provides all necessary three-electron integrals 
+ END_DOC
+ if(three_body_h_tc)then
+  if(three_e_3_idx_term)then
+   PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort
+   PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort
+  endif
+  if(three_e_4_idx_term)then
+   PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort
+   PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort
+  endif
+  if(.not.double_normal_ord.and.three_e_5_idx_term)then
+   PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort
+   PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort
+  elseif (double_normal_ord .and. (.not. three_e_5_idx_term))then
+   PROVIDE normal_two_body_bi_orth
+  endif
+ endif
+end
+
 subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot)
  implicit none
   BEGIN_DOC
diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
index 00cebf3a..531f0141 100644
--- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
@@ -7,11 +7,11 @@
 ! Various component of the TC energy for the reference "HF" Slater determinant
  END_DOC 
  double precision :: hmono, htwoe, htot, hthree
- call diag_htilde_mu_mat_bi_ortho(N_int,HF_bitmask , hmono, htwoe, htot)
+ call diag_htilde_mu_mat_bi_ortho_slow(N_int,HF_bitmask , hmono, htwoe, htot)
  ref_tc_energy_1e = hmono
  ref_tc_energy_2e = htwoe 
  if(three_body_h_tc)then
-  call diag_htilde_three_body_ints_bi_ort(N_int, HF_bitmask, hthree)
+  call diag_htilde_three_body_ints_bi_ort_slow(N_int, HF_bitmask, hthree)
   ref_tc_energy_3e = hthree
  else
   ref_tc_energy_3e = 0.d0
@@ -156,7 +156,7 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
     htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
   enddo
 
-  if(three_body_h_tc)then
+  if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then
    !!!!! 3-e part 
    !! same-spin/same-spin
    do j = 1, na
@@ -243,7 +243,7 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
     htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
   enddo
 
-  if(three_body_h_tc)then
+  if(three_body_h_tc.and.elec_num.gt.2.and.three_e_3_idx_term)then
    !!!!! 3-e part 
    !! same-spin/same-spin
    do j = 1, na
diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
index baca498c..2d6bfb27 100644
--- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@@ -41,15 +41,15 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe,
   if(s1.ne.s2)then
    ! opposite spin two-body 
     htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
-    if(three_body_h_tc)then
-     if(.not.double_normal_ord)then
+    if(three_body_h_tc.and.elec_num.gt.2)then
+     if(.not.double_normal_ord.and.three_e_5_idx_term)then
       if(degree_i>degree_j)then
        call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree)
       else
        call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
       endif
-     elseif(double_normal_ord.and.elec_num+elec_num.gt.2)then
-      htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ???
+     elseif(double_normal_ord)then
+      htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)
      endif
     endif
   else
@@ -58,16 +58,16 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe,
    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
    ! exchange terms 
    htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
-   if(three_body_h_tc)then
-    if(.not.double_normal_ord)then
+   if(three_body_h_tc.and.elec_num.gt.2)then
+    if(.not.double_normal_ord.and.three_e_5_idx_term)then
      if(degree_i>degree_j)then
       call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree)
      else
       call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
      endif
-    elseif(double_normal_ord.and.elec_num+elec_num.gt.2)then
-      htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ???
-      htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ???
+    elseif(double_normal_ord)then
+      htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)
+      htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)
     endif
    endif
   endif
diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
index 7cff3c73..7178d6d9 100644
--- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
@@ -106,7 +106,7 @@ subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,h
   htwoe -= buffer_x(i)
  enddo
  hthree = 0.d0
- if (three_body_h_tc)then
+ if (three_body_h_tc.and.elec_num.gt.2.and.three_e_4_idx_term)then
   call three_comp_fock_elem(key_i,h,p,spin,hthree)
  endif
 
diff --git a/src/tc_bi_ortho/slater_tc.irp.f b/src/tc_bi_ortho/slater_tc_slow.irp.f
similarity index 85%
rename from src/tc_bi_ortho/slater_tc.irp.f
rename to src/tc_bi_ortho/slater_tc_slow.irp.f
index 2c0ae2ca..1833d20f 100644
--- a/src/tc_bi_ortho/slater_tc.irp.f
+++ b/src/tc_bi_ortho/slater_tc_slow.irp.f
@@ -1,7 +1,7 @@
 
 ! ---
 
-subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot)
+subroutine htilde_mu_mat_bi_ortho_tot_slow(key_j, key_i, Nint, htot)
 
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
@@ -24,14 +24,14 @@ subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot)
   if(degree.gt.2)then
     htot = 0.d0
   else
-    call htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+    call htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
   endif
 
-end subroutine htilde_mu_mat_bi_ortho_tot
+end subroutine htilde_mu_mat_bi_ortho_tot_slow
 
 ! --
 
-subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+subroutine htilde_mu_mat_bi_ortho_slow(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
 
   BEGIN_DOC
   !
@@ -61,22 +61,22 @@ subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot
   if(degree.gt.2) return
 
   if(degree == 0)then
-    call diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot)
+    call diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot)
   else if (degree == 1)then
-    call single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+    call single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot)
   else if(degree == 2)then
-    call double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+    call double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot)
   endif
 
   if(three_body_h_tc) then
     if(degree == 2) then
-      if(.not.double_normal_ord) then
-        call double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+      if(.not.double_normal_ord.and.elec_num.gt.2.and.three_e_5_idx_term) then
+        call double_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree)
       endif
-    else if(degree == 1) then
-      call single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
-    else if(degree == 0) then
-      call diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
+    else if(degree == 1.and.elec_num.gt.2.and.three_e_4_idx_term) then
+      call single_htilde_three_body_ints_bi_ort_slow(Nint, key_j, key_i, hthree)
+    else if(degree == 0.and.elec_num.gt.2.and.three_e_3_idx_term) then
+      call diag_htilde_three_body_ints_bi_ort_slow(Nint, key_i, hthree)
     endif
   endif
 
@@ -89,7 +89,7 @@ end
 
 ! ---
 
-subroutine diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot)
+subroutine diag_htilde_mu_mat_bi_ortho_slow(Nint, key_i, hmono, htwoe, htot)
 
   BEGIN_DOC
   !  diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS 
@@ -188,7 +188,7 @@ end
 
 
 
-subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+subroutine double_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot)
 
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> for double excitation  ONLY FOR ONE- AND TWO-BODY TERMS 
@@ -227,18 +227,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
    return
   endif
 
-!  if(core_tc_op)then
-!   print*,'core_tc_op not already taken into account for bi ortho'
-!   print*,'stopping ...'
-!   stop
-!   do i = 1, Nint
-!    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
-!    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
-!   enddo
-!   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
-!  else
    call bitstring_to_list_ab(key_i, occ, Ne, Nint)
-!  endif
   call get_double_excitation(key_i, key_j, exc, phase, Nint)
   call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
 
@@ -246,7 +235,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
    ! opposite spin two-body 
 !   key_j, key_i
     htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
-    if(double_normal_ord.and.+Ne(1).gt.2)then
+    if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then
      htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ???
     endif
   else
@@ -255,7 +244,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
    ! exchange terms 
    htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
-   if(double_normal_ord.and.+Ne(1).gt.2)then
+   if(three_body_h_tc.and.double_normal_ord.and.+Ne(1).gt.2)then
     htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ???
     htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ???
    endif
@@ -266,7 +255,7 @@ subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
 end
 
 
-subroutine single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+subroutine single_htilde_mu_mat_bi_ortho_slow(Nint, key_j, key_i, hmono, htwoe, htot)
 
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
diff --git a/src/tc_bi_ortho/tc_bi_ortho.irp.f b/src/tc_bi_ortho/tc_bi_ortho.irp.f
index bd0b1ef5..f69684c0 100644
--- a/src/tc_bi_ortho/tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f
@@ -1,16 +1,25 @@
 program tc_bi_ortho
-  implicit none
+
   BEGIN_DOC
-! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. 
+  !
+  ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together 
+  !        with the energy. Saves the left-right wave functions at the end. 
+  !
   END_DOC
+
   my_grid_becke = .True.
   my_n_pt_r_grid = 30
   my_n_pt_a_grid = 50
   read_wf = .True.
   touch read_wf
-  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
-  call routine_diag
-  call save_tc_bi_ortho_wavefunction
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  print*, ' nb of states = ', N_states
+  print*, ' nb of det    = ', N_det
+
+  call routine_diag()
+  call write_tc_energy()
+  call save_tc_bi_ortho_wavefunction()
 end
 
 subroutine test
@@ -27,26 +36,56 @@ subroutine test
 
 end
 
-subroutine routine_diag
- implicit none
-! provide eigval_right_tc_bi_orth
-!  provide overlap_bi_ortho
-!  provide htilde_matrix_elmt_bi_ortho
- integer ::i,j
- print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1)
- print*,'e_tc_left_right         = ',e_tc_left_right
- print*,'e_tilde_bi_orth_00      = ',e_tilde_bi_orth_00
- print*,'e_pt2_tc_bi_orth        = ',e_pt2_tc_bi_orth
- print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single
- print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double
- print*,'***'
- print*,'e_corr_bi_orth          = ',e_corr_bi_orth
- print*,'e_corr_bi_orth_proj     = ',e_corr_bi_orth_proj
- print*,'e_corr_single_bi_orth   = ',e_corr_single_bi_orth
- print*,'e_corr_double_bi_orth   = ',e_corr_double_bi_orth
- print*,'Left/right eigenvectors'
- do i = 1,N_det
-  write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1)
- enddo
+subroutine routine_diag()
+
+  implicit none
+  integer          :: i, j, k
+  double precision :: dE
+
+  ! provide eigval_right_tc_bi_orth
+  ! provide overlap_bi_ortho
+  ! provide htilde_matrix_elmt_bi_ortho
+
+  if(N_states .eq. 1) then
+
+    print*,'eigval_right_tc_bi_orth   = ',eigval_right_tc_bi_orth(1)
+    print*,'e_tc_left_right           = ',e_tc_left_right
+    print*,'e_tilde_bi_orth_00        = ',e_tilde_bi_orth_00
+    print*,'e_pt2_tc_bi_orth          = ',e_pt2_tc_bi_orth
+    print*,'e_pt2_tc_bi_orth_single   = ',e_pt2_tc_bi_orth_single
+    print*,'e_pt2_tc_bi_orth_double   = ',e_pt2_tc_bi_orth_double
+    print*,'***'                      
+    print*,'e_corr_bi_orth            = ',e_corr_bi_orth
+    print*,'e_corr_bi_orth_proj       = ',e_corr_bi_orth_proj
+    print*,'e_corr_bi_orth_proj_abs   = ',e_corr_bi_orth_proj_abs
+    print*,'e_corr_single_bi_orth     = ',e_corr_single_bi_orth
+    print*,'e_corr_double_bi_orth     = ',e_corr_double_bi_orth
+    print*,'e_corr_single_bi_orth_abs = ',e_corr_single_bi_orth_abs
+    print*,'e_corr_double_bi_orth_abs = ',e_corr_double_bi_orth_abs
+    print*,'Left/right eigenvectors'
+    do i = 1,N_det
+      write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1)
+    enddo
+
+  else
+
+    print*,'eigval_right_tc_bi_orth : '
+    do i = 1, N_states
+      print*, i, eigval_right_tc_bi_orth(i)
+    enddo
+
+    print*,''
+    print*,'******************************************************'
+    print*,'TC Excitation energies (au)                     (eV)'
+    do i = 2, N_states
+      dE = eigval_right_tc_bi_orth(i) - eigval_right_tc_bi_orth(1)
+      print*, i, dE, dE/0.0367502d0
+    enddo
+    print*,''
+
+  endif
+
 end
 
+
+
diff --git a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f
index 4ae44148..4c3c0788 100644
--- a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f
+++ b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f
@@ -11,10 +11,10 @@
  allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states))
  dressing_dets = 0.d0
  do i = 1, N_det
-  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
+  call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
   call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
   if(degree == 1 .or. degree == 2)then
-   call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i))
+   call htilde_mu_mat_bi_ortho_slow(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i))
   endif
  enddo
  reigvec_tc_bi_orth_tmp = 0.d0
@@ -29,7 +29,7 @@
   vec_tmp(istate,istate) = 1.d0
  enddo
  print*,'Diagonalizing the TC CISD '
- call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+ call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow)
  do i = 1, N_det 
   e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1)
  enddo
@@ -41,8 +41,8 @@
  it = 0
  dressing_dets = 0.d0
   double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:)
-  external                         htc_bi_ortho_calc_tdav
-  external                         htcdag_bi_ortho_calc_tdav
+  external                         htc_bi_ortho_calc_tdav_slow
+  external                         htcdag_bi_ortho_calc_tdav_slow
   logical                       :: converged
  do while (dabs(E_before-E_current).gt.thr)
   it += 1
@@ -66,7 +66,7 @@
   do istate = N_states+1, n_states_diag
    vec_tmp(istate,istate) = 1.d0
   enddo
-  call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+  call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav_slow)
   print*,'outside Davidson'
   print*,'eigval_tmp(1) = ',eigval_tmp(1)
   do i = 1, N_det 
diff --git a/src/tc_bi_ortho/tc_h_eigvectors.irp.f b/src/tc_bi_ortho/tc_h_eigvectors.irp.f
index 69302da2..fa946d6a 100644
--- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f
+++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f
@@ -43,7 +43,7 @@ end
   END_DOC
 
   implicit none
-  integer                       :: i, idx_dress, j, istate
+  integer                       :: i, idx_dress, j, istate, k
   logical                       :: converged, dagger
   integer                       :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l
   double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:)
@@ -52,116 +52,123 @@ end
   integer                        :: i_good_state,i_other_state, i_state
   integer, allocatable           :: index_good_state_array(:)
   logical, allocatable           :: good_state_array(:)
-  double precision, allocatable :: coef_hf_r(:),coef_hf_l(:)
+  double precision, allocatable  :: coef_hf_r(:),coef_hf_l(:)
+  double precision, allocatable  :: Stmp(:,:)
   integer, allocatable :: iorder(:)
 
   PROVIDE N_det N_int
 
-   if(n_det.le.N_det_max_full)then
+  if(n_det .le. N_det_max_full) then
+
     allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det),expect_e(N_det))
     allocate (H_prime(N_det,N_det),s2_values_tmp(N_det))
+
     H_prime(1:N_det,1:N_det) = htilde_matrix_elmt_bi_ortho(1:N_det,1:N_det)
-    if(s2_eig)then
-       H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det)
-       do j=1,N_det
-         H_prime(j,j) = H_prime(j,j) - alpha*expected_s2
-       enddo
+    if(s2_eig) then
+      H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det)
+      do j=1,N_det
+        H_prime(j,j) = H_prime(j,j) - alpha*expected_s2
+      enddo
     endif
-    call non_hrmt_real_diag(N_det,H_prime,& 
-         leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& 
-         n_real_tc_bi_orth_eigval_right,eigval_right_tmp)
+
+    call non_hrmt_real_diag(N_det, H_prime, leigvec_tc_bi_orth_tmp, reigvec_tc_bi_orth_tmp, n_real_tc_bi_orth_eigval_right, eigval_right_tmp)
 !    do i = 1, N_det
 !     call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp(1,i),reigvec_tc_bi_orth_tmp(1,i),1,N_det,expect_e(i), s2_values_tmp(i))
 !    enddo
     call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,N_det,N_det,expect_e, s2_values_tmp)
+
     allocate(index_good_state_array(N_det),good_state_array(N_det))
     i_state = 0
     good_state_array = .False.
-    if(s2_eig)then
-     if (only_expected_s2) then
-       do j=1,N_det
+
+    if(s2_eig) then
+
+      if(only_expected_s2) then
+        do j = 1, N_det
          ! Select at least n_states states with S^2 values closed to "expected_s2"
 !         print*,'s2_values_tmp(j) = ',s2_values_tmp(j),eigval_right_tmp(j),expect_e(j)
-         if(dabs(s2_values_tmp(j)-expected_s2).le.0.5d0)then
-           i_state +=1
-           index_good_state_array(i_state) = j
-           good_state_array(j) = .True.
-         endif
-         if(i_state.eq.N_states) then
-           exit
-         endif
-       enddo
-     else
-       do j=1,N_det
-         index_good_state_array(j) = j
-         good_state_array(j) = .True.
-       enddo
-     endif
-     if(i_state .ne.0)then
-       ! Fill the first "i_state" states that have a correct S^2 value
-       do j = 1, i_state
-         do i=1,N_det
-           reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j))
-           leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j))
-         enddo
-         eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j))
-         eigval_left_tc_bi_orth(j)  = expect_e(index_good_state_array(j))
-         s2_eigvec_tc_bi_orth(j)    = s2_values_tmp(index_good_state_array(j))
-       enddo
-       i_other_state = 0
-       do j = 1, N_det
-         if(good_state_array(j))cycle
-         i_other_state +=1
-         if(i_state+i_other_state.gt.n_states)then
-           exit
-         endif
-         do i=1,N_det
-           reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j)
-           leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j)
-         enddo
-         eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j)
-         eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j)
-         s2_eigvec_tc_bi_orth(i_state+i_other_state)    = s2_values_tmp(i_state+i_other_state)
-       enddo
-     else ! istate == 0
-         print*,''
-         print*,'!!!!!!!!   WARNING  !!!!!!!!!'
-         print*,'  Within the ',N_det,'determinants selected'
-         print*,'  and the ',N_states_diag,'states requested'
-         print*,'  We did not find only states with S^2 values close to ',expected_s2
-         print*,'  We will then set the first N_states eigenvectors of the H matrix'
-         print*,'  as the CI_eigenvectors'
-         print*,'  You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space'
-         print*,''
-         do j=1,min(N_states_diag,N_det)
-           do i=1,N_det
-             leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j)
-             reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j)
-           enddo
-           eigval_right_tc_bi_orth(j) = eigval_right_tmp(j)
-           eigval_left_tc_bi_orth (j) = eigval_right_tmp(j)
-           s2_eigvec_tc_bi_orth(j)    = s2_values_tmp(j)
-         enddo
-     endif ! istate .ne. 0
+          if(dabs(s2_values_tmp(j) - expected_s2).le.0.5d0)then
+            i_state +=1
+            index_good_state_array(i_state) = j
+            good_state_array(j) = .True.
+          endif
+          if(i_state.eq.N_states) then
+            exit
+          endif
+        enddo
+      else
+        do j = 1, N_det
+          index_good_state_array(j) = j
+          good_state_array(j) = .True.
+        enddo
+      endif
+
+      if(i_state .ne. 0) then
+        ! Fill the first "i_state" states that have a correct S^2 value
+        do j = 1, i_state
+          do i = 1, N_det
+            reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j))
+            leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j))
+          enddo
+          eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j))
+          eigval_left_tc_bi_orth(j)  = expect_e(index_good_state_array(j))
+          s2_eigvec_tc_bi_orth(j)    = s2_values_tmp(index_good_state_array(j))
+        enddo
+        i_other_state = 0
+        do j = 1, N_det
+          if(good_state_array(j))cycle
+          i_other_state +=1
+          if(i_state+i_other_state.gt.n_states)then
+            exit
+          endif
+          do i = 1, N_det
+            reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j)
+            leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j)
+          enddo
+          eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j)
+          eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j)
+          s2_eigvec_tc_bi_orth(i_state+i_other_state)    = s2_values_tmp(i_state+i_other_state)
+        enddo
+      else ! istate == 0
+        print*,''
+        print*,'!!!!!!!!   WARNING  !!!!!!!!!'
+        print*,'  Within the ',N_det,'determinants selected'
+        print*,'  and the ',N_states_diag,'states requested'
+        print*,'  We did not find only states with S^2 values close to ',expected_s2
+        print*,'  We will then set the first N_states eigenvectors of the H matrix'
+        print*,'  as the CI_eigenvectors'
+        print*,'  You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space'
+        print*,''
+        do j = 1, min(N_states_diag, N_det)
+          do i = 1, N_det
+            leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j)
+            reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j)
+          enddo
+          eigval_right_tc_bi_orth(j) = eigval_right_tmp(j)
+          eigval_left_tc_bi_orth (j) = eigval_right_tmp(j)
+          s2_eigvec_tc_bi_orth(j)    = s2_values_tmp(j)
+        enddo
+      endif ! istate .ne. 0
 
     else ! s2_eig
-       allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det))
-       do i = 1,N_det
+
+      allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det))
+      do i = 1,N_det
         iorder(i) = i
         coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i))
-       enddo
-       call dsort(coef_hf_r,iorder,N_det)
-       igood_r = iorder(1)
-       print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1)
-       do i = 1,N_det
+      enddo
+      call dsort(coef_hf_r,iorder,N_det)
+      igood_r = iorder(1)
+      print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1)
+      do i = 1,N_det
         iorder(i) = i
         coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i))
-       enddo
-       call dsort(coef_hf_l,iorder,N_det)
-       igood_l = iorder(1)
-       print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1)
+      enddo
+      call dsort(coef_hf_l,iorder,N_det)
+      igood_l = iorder(1)
+      print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1)
        
-       if(igood_r.ne.igood_l.and.igood_r.ne.1)then
+      if(igood_r.ne.igood_l .and. igood_r.ne.1) then
         print *,''
         print *,'Warning, the left and right eigenvectors are "not the same" '
         print *,'Warning, the ground state is not dominated by HF...'
@@ -169,22 +176,22 @@ end
         print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r)
         print *,'State with largest LEFT  coefficient of HF ',igood_l
         print *,'coef of HF in LEFT  eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l)
-       endif
-       if(state_following_tc)then
+      endif
+
+      if(state_following_tc) then
         print *,'Following the states with the largest coef on HF'
         print *,'igood_r,igood_l',igood_r,igood_l
-        i= igood_r
+        i = igood_r
         eigval_right_tc_bi_orth(1) = eigval_right_tmp(i)
         do j = 1, N_det
           reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i)
-!          print*,reigvec_tc_bi_orth(j,1)
         enddo
-        i= igood_l
+        i = igood_l
         eigval_left_tc_bi_orth(1)  = eigval_right_tmp(i)
         do j = 1, N_det
           leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i)
         enddo
-       else 
+      else 
         do i = 1, N_states
           eigval_right_tc_bi_orth(i) = eigval_right_tmp(i)
           eigval_left_tc_bi_orth(i)  = eigval_right_tmp(i)
@@ -193,46 +200,48 @@ end
             leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i)
           enddo
         enddo
-       endif
+      endif
+
     endif
-   else 
+
+  else ! n_det > N_det_max_full
+
     double precision, allocatable :: H_jj(:),vec_tmp(:,:)
-    external                         htc_bi_ortho_calc_tdav
-    external                         htcdag_bi_ortho_calc_tdav
     external                         H_tc_u_0_opt
     external                         H_tc_dagger_u_0_opt
     external                         H_tc_s2_dagger_u_0_opt
     external                         H_tc_s2_u_0_opt
+
     allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag))
+
     do i = 1, N_det
-      call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
+      call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
     enddo
- !!!! Preparing the left-eigenvector
+
     print*,'---------------------------------'
     print*,'---------------------------------'
     print*,'Computing the left-eigenvector '
     print*,'---------------------------------'
     print*,'---------------------------------'
+    !!!! Preparing the left-eigenvector
     vec_tmp = 0.d0
     do istate = 1, N_states
-     vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate)
+      vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate)
     enddo
     do istate = N_states+1, n_states_diag
-     vec_tmp(istate,istate) = 1.d0
+      vec_tmp(istate,istate) = 1.d0
     enddo
-!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav)
-!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt)
     integer :: n_it_max,i_it
     n_it_max = 1
     converged = .False.
     i_it = 0
     do while (.not.converged)
-     call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt)
-     i_it += 1
-     if(i_it .gt. 5)exit
+      call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt)
+      i_it += 1
+      if(i_it .gt. 5) exit
     enddo
     do istate = 1, N_states
-     leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
+      leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
     enddo
 
     print*,'---------------------------------'
@@ -240,78 +249,124 @@ end
     print*,'Computing the right-eigenvector '
     print*,'---------------------------------'
     print*,'---------------------------------'
- !!!! Preparing the right-eigenvector
+    !!!! Preparing the right-eigenvector
     vec_tmp = 0.d0
     do istate = 1, N_states
-     vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate)
+      vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate)
     enddo
     do istate = N_states+1, n_states_diag
-     vec_tmp(istate,istate) = 1.d0
+      vec_tmp(istate,istate) = 1.d0
     enddo
-!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
-!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt)
+    !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt)
     converged = .False.
     i_it = 0
-    do while (.not.converged)
-     call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt)
-     i_it += 1
-     if(i_it .gt. 5)exit
+    do while (.not. converged)
+      call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt)
+      i_it += 1
+      if(i_it .gt. 5) exit
     enddo
     do istate = 1, N_states
-     reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
+      reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
     enddo
 
     deallocate(H_jj)
-   endif
-  call bi_normalize(leigvec_tc_bi_orth,reigvec_tc_bi_orth,size(reigvec_tc_bi_orth,1),N_det,N_states)
-   print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ',leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1)
-   norm_ground_left_right_bi_orth = 0.d0
-   do j = 1, N_det
-    norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1)
-   enddo
-   print*,'norm l/r = ',norm_ground_left_right_bi_orth
-   print*,'<S2>     = ',s2_eigvec_tc_bi_orth(1)
+  endif
+
+  call bi_normalize(leigvec_tc_bi_orth, reigvec_tc_bi_orth, size(reigvec_tc_bi_orth, 1), N_det, N_states)
+  ! check bi-orthogonality
+  allocate(Stmp(N_states,N_states))
+  call dgemm( 'T', 'N', N_states, N_states, N_det, 1.d0                                                              &
+        , leigvec_tc_bi_orth(1,1), size(leigvec_tc_bi_orth, 1), reigvec_tc_bi_orth(1,1), size(reigvec_tc_bi_orth, 1) &
+        , 0.d0, Stmp(1,1), size(Stmp, 1) )
+  print *, ' overlap matrix between states:'
+  do i = 1, N_states
+    write(*,'(1000(F16.10,X))') Stmp(i,:)
+  enddo
+  deallocate(Stmp)
+
+  print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ', leigvec_tc_bi_orth(1,1), reigvec_tc_bi_orth(1,1)
+  do i = 1, N_states
+    norm_ground_left_right_bi_orth = 0.d0
+    do j = 1, N_det
+      norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,i) * reigvec_tc_bi_orth(j,i)
+    enddo
+    print*,' state      ', i
+    print*,' norm l/r = ', norm_ground_left_right_bi_orth
+    print*,' <S2>     = ', s2_eigvec_tc_bi_orth(i)
+  enddo
+
+  double precision, allocatable :: buffer(:,:)
+  allocate(buffer(N_det,N_states))
+  do k = 1, N_states
+    do i = 1, N_det
+      psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k)
+      buffer(i,k) = leigvec_tc_bi_orth(i,k)
+    enddo
+  enddo
+  TOUCH psi_l_coef_bi_ortho
+  call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer)
+  do k = 1, N_states
+    do i = 1, N_det
+      psi_r_coef_bi_ortho(i,k) = reigvec_tc_bi_orth(i,k)
+      buffer(i,k) = reigvec_tc_bi_orth(i,k)
+    enddo
+  enddo
+  TOUCH psi_r_coef_bi_ortho
+  call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer)
+  deallocate(buffer)
 
 END_PROVIDER 
 
 
 
-subroutine bi_normalize(u_l,u_r,n,ld,nstates)
+subroutine bi_normalize(u_l, u_r, n, ld, nstates)
+
+  BEGIN_DOC
   !!!! Normalization of the scalar product of the left/right eigenvectors
+  END_DOC
+
+  implicit none
+  integer,          intent(in)    :: n, ld, nstates
   double precision, intent(inout) :: u_l(ld,nstates), u_r(ld,nstates)
-  integer, intent(in) :: n,ld,nstates
-  integer :: i
-  double precision  :: accu, tmp 
+  integer                         :: i, j
+  double precision                :: accu, tmp
+
   do i = 1, nstates
-   !!!! Normalization of right eigenvectors |Phi>
-   accu = 0.d0
-   do j = 1, n
-    accu += u_r(j,i) * u_r(j,i)
-   enddo
-   accu = 1.d0/dsqrt(accu)
-   print*,'accu_r = ',accu
-   do j = 1, n
-    u_r(j,i) *= accu 
-   enddo
-   tmp = u_r(1,i) / dabs(u_r(1,i))
-   do j = 1, n
-    u_r(j,i) *= tmp
-   enddo
-   !!!! Adaptation of the norm of the left eigenvector such that <chi|Phi> = 1
-   accu = 0.d0
-   do j = 1, n
-    accu += u_l(j,i) * u_r(j,i)
-!    print*,j, u_l(j,i) , u_r(j,i)
-   enddo
-   if(accu.gt.0.d0)then
+
+    !!!! Normalization of right eigenvectors |Phi>
+    accu = 0.d0
+    do j = 1, n
+      accu += u_r(j,i) * u_r(j,i)
+    enddo
     accu = 1.d0/dsqrt(accu)
-   else
-    accu = 1.d0/dsqrt(-accu)
-   endif
-   tmp = (u_l(1,i) * u_r(1,i) )/dabs(u_l(1,i) * u_r(1,i))
-   do j = 1, n
-    u_l(j,i) *= accu * tmp
-    u_r(j,i) *= accu 
-   enddo
+    print*,'accu_r = ',accu
+    do j = 1, n
+      u_r(j,i) *= accu
+    enddo
+    tmp = u_r(1,i) / dabs(u_r(1,i))
+    do j = 1, n
+      u_r(j,i) *= tmp
+    enddo
+
+    !!!! Adaptation of the norm of the left eigenvector such that <chi|Phi> = 1
+    accu = 0.d0
+    do j = 1, n
+      accu += u_l(j,i) * u_r(j,i)
+      !print*,j, u_l(j,i) , u_r(j,i)
+    enddo
+    print*,'accu_lr = ', accu
+    if(accu.gt.0.d0)then
+      accu = 1.d0/dsqrt(accu)
+    else
+      accu = 1.d0/dsqrt(-accu)
+    endif
+    tmp = (u_l(1,i) * u_r(1,i) )/dabs(u_l(1,i) * u_r(1,i))
+    do j = 1, n
+      u_l(j,i) *= accu * tmp
+      u_r(j,i) *= accu
+    enddo
+
   enddo
+
 end
+
diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f
index 44e27e7c..ec072531 100644
--- a/src/tc_bi_ortho/tc_hmat.irp.f
+++ b/src/tc_bi_ortho/tc_hmat.irp.f
@@ -9,28 +9,25 @@
  
   implicit none
   integer          :: i, j
-  double precision :: hmono,htwoe,hthree,htot
+  double precision :: htot
 
   PROVIDE N_int
- !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hmono, htwoe, hthree, htot) &
+
+  i = 1
+  j = 1
+  call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot)
+
+ !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j, htot) &
  !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho)
     do i = 1, N_det
       do j = 1, N_det
         ! < J | Htilde | I >
-        call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+        call htilde_mu_mat_opt_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot)
 
-        !print *, ' hmono  = ', hmono
-        !print *, ' htwoe  = ', htwoe
-        !print *, ' hthree = ', hthree
         htilde_matrix_elmt_bi_ortho(j,i) = htot
       enddo
     enddo
  !$OMP END PARALLEL DO
-! print*,'htilde_matrix_elmt_bi_ortho = '
-! do i = 1, min(100,N_det)
-!  write(*,'(100(F16.10,X))')htilde_matrix_elmt_bi_ortho(1:min(100,N_det),i)
-! enddo
-
 
 END_PROVIDER 
 
diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f
index 291c52ef..a7e4d09e 100644
--- a/src/tc_bi_ortho/tc_som.irp.f
+++ b/src/tc_bi_ortho/tc_som.irp.f
@@ -56,8 +56,8 @@ subroutine main()
   U_SOM = 0.d0 
   do i = 1, N_det
     if(i == i_HF) cycle
-    call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1)
-    call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2)
+    call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1)
+    call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2)
     U_SOM += htot_1 * htot_2
   enddo
   U_SOM = 0.5d0 * U_SOM
diff --git a/src/tc_bi_ortho/tc_utils.irp.f b/src/tc_bi_ortho/tc_utils.irp.f
new file mode 100644
index 00000000..24bb7017
--- /dev/null
+++ b/src/tc_bi_ortho/tc_utils.irp.f
@@ -0,0 +1,60 @@
+
+subroutine write_tc_energy()
+
+  implicit none
+  integer          :: i, j, k
+  double precision :: hmono, htwoe, hthree, htot
+  double precision :: E_TC, O_TC
+
+  do k = 1, n_states
+
+    E_TC = 0.d0
+    do i = 1, N_det
+      do j = 1, N_det
+        !htot = htilde_matrix_elmt_bi_ortho(i,j)
+        call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot)
+        E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot
+        !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot
+      enddo
+    enddo
+
+    O_TC = 0.d0
+    do i = 1, N_det
+      !O_TC = O_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(i,k)
+      O_TC = O_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(i,k)
+    enddo
+
+    print *, ' state :', k
+    print *, " E_TC = ", E_TC / O_TC
+    print *, " O_TC = ", O_TC
+
+  enddo
+
+end
+
+! ---
+
+subroutine write_tc_var()
+
+  implicit none
+  integer          :: i, j, k
+  double precision :: hmono, htwoe, hthree, htot
+  double precision :: SIGMA_TC
+
+  do k = 1, n_states
+
+    SIGMA_TC = 0.d0
+    do j = 2, N_det
+      call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,1), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot)
+      SIGMA_TC = SIGMA_TC + htot * htot
+    enddo
+
+    print *, " state    : ", k
+    print *, " SIGMA_TC = ", SIGMA_TC
+
+  enddo
+
+end
+
+! ---
+
diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f
index 118e481a..cb0c355c 100644
--- a/src/tc_bi_ortho/test_normal_order.irp.f
+++ b/src/tc_bi_ortho/test_normal_order.irp.f
@@ -35,7 +35,7 @@ subroutine test
      det_i = ref_bitmask
      call do_single_excitation(det_i,h1,p1,s1,i_ok)
      call do_single_excitation(det_i,h2,p2,s2,i_ok)
-     call htilde_mu_mat_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+     call htilde_mu_mat_bi_ortho_slow(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
      call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
      call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
      hthree *= phase
@@ -67,7 +67,7 @@ do h1 = 1, elec_alpha_num
     if(i_ok.ne.1)cycle
     call do_single_excitation(det_i,h2,p2,s2,i_ok)
     if(i_ok.ne.1)cycle
-    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
     call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
     call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
     integer :: hh1, pp1, hh2, pp2, ss1, ss2
@@ -103,7 +103,7 @@ do h1 = 1, elec_beta_num
     if(i_ok.ne.1)cycle
     call do_single_excitation(det_i,h2,p2,s2,i_ok)
     if(i_ok.ne.1)cycle
-    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call htilde_mu_mat_bi_ortho_slow(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
     call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
     call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
     call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2)
diff --git a/src/tc_bi_ortho/test_s2_tc.irp.f b/src/tc_bi_ortho/test_s2_tc.irp.f
index 4debe2e2..1f7bdfda 100644
--- a/src/tc_bi_ortho/test_s2_tc.irp.f
+++ b/src/tc_bi_ortho/test_s2_tc.irp.f
@@ -91,7 +91,7 @@ subroutine routine_test_s2_davidson
  external H_tc_s2_u_0_opt
  allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),energies(n_states_diag), s2(n_states_diag))
  do i = 1, N_det
-   call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
+   call htilde_mu_mat_bi_ortho_tot_slow(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
  enddo
  ! Preparing the left-eigenvector
  print*,'Computing the left-eigenvector '
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index 6721c285..df86ea65 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -31,7 +31,7 @@ subroutine test_h_u0
   u_0(i) = psi_r_coef_bi_ortho(i,1)
  enddo
  call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right)
- call htc_bi_ortho_calc_tdav (v_0_ref,u_0,N_states,N_det)
+ call htc_bi_ortho_calc_tdav_slow (v_0_ref,u_0,N_states,N_det)
  print*,'difference right '
  accu = 0.d0
  do i = 1, N_det
@@ -42,7 +42,7 @@ subroutine test_h_u0
  do_right = .False.
  v_0_new = 0.d0
  call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right)
- call htcdag_bi_ortho_calc_tdav(v_0_ref_dagger,u_0,N_states,N_det, do_right)
+ call htcdag_bi_ortho_calc_tdav_slow(v_0_ref_dagger,u_0,N_states,N_det, do_right)
  print*,'difference left'
  accu = 0.d0
  do i = 1, N_det
@@ -63,7 +63,7 @@ subroutine test_slater_tc_opt
  i_count = 0.d0
  do i = 1, N_det
   do j = 1,N_det
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
    call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot)
    if(dabs(htot).gt.1.d-15)then
      i_count += 1.D0
@@ -99,7 +99,7 @@ subroutine timing_tot
   do j = 1, N_det
 !   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
    i_count += 1.d0
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
   enddo
  enddo
  call wall_time(wall1)
@@ -146,7 +146,7 @@ subroutine timing_diag
  do i = 1, N_det
   do j = i,i 
    i_count += 1.d0
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
   enddo
  enddo
  call wall_time(wall1)
@@ -183,7 +183,7 @@ subroutine timing_single
    if(degree.ne.1)cycle
    i_count += 1.d0
    call wall_time(wall0)
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
    call wall_time(wall1)
    accu += wall1 - wall0
   enddo
@@ -225,7 +225,7 @@ subroutine timing_double
    if(degree.ne.2)cycle
    i_count += 1.d0
    call wall_time(wall0)
-   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_bi_ortho_slow(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
    call wall_time(wall1)
    accu += wall1 - wall0
   enddo
diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f
index ebd43a7a..b7de067f 100644
--- a/src/tc_bi_ortho/test_tc_fock.irp.f
+++ b/src/tc_bi_ortho/test_tc_fock.irp.f
@@ -25,8 +25,7 @@ subroutine test_3e
  implicit none
  double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu
  double precision ::  hmono, htwoe, hthree, htot
- call htilde_mu_mat_bi_ortho(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot)
-! call diag_htilde_three_body_ints_bi_ort(N_int, ref_bitmask, hthree)
+ call htilde_mu_mat_bi_ortho_slow(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot)
  print*,'hmono = ',hmono
  print*,'htwoe = ',htwoe
  print*,'hthree= ',hthree
@@ -88,7 +87,7 @@ subroutine routine_3()
        print*, ' excited det'
        call debug_det(det_i, N_int)
  
-       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
        if(dabs(hthree).lt.1.d-10)cycle
        ref = hthree 
        if(s1 == 1)then
@@ -156,7 +155,7 @@ subroutine routine_tot()
         stop
        endif
  
-       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       call htilde_mu_mat_bi_ortho_slow(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
        print*,htilde_ij
        if(dabs(htilde_ij).lt.1.d-10)cycle
        print*, ' excited det'
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index 85c8dac3..62adb068 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -16,6 +16,24 @@ doc: If |true|, three-body terms are included
 interface: ezfio,provider,ocaml
 default: True
 
+[three_e_3_idx_term]
+type: logical
+doc: If |true|, the diagonal 3-idx terms of the 3-e interaction are taken
+interface: ezfio,provider,ocaml
+default: True
+
+[three_e_4_idx_term]
+type: logical
+doc: If |true|, the off-diagonal 4-idx terms of the 3-e interaction are taken
+interface: ezfio,provider,ocaml
+default: True
+
+[three_e_5_idx_term]
+type: logical
+doc: If |true|, the off-diagonal 5-idx terms of the 3-e interaction are taken
+interface: ezfio,provider,ocaml
+default: True
+
 [pure_three_body_h_tc]
 type: logical
 doc: If |true|, pure triple excitation three-body terms are included
@@ -124,6 +142,18 @@ doc: type of 1-body Jastrow
 interface: ezfio, provider, ocaml
 default: 0
 
+[mu_r_ct]
+type: double precision
+doc: a parameter used to define mu(r)
+interface: ezfio, provider, ocaml
+default: 6.203504908994001e-1
+
+[beta_rho_power]
+type: double precision
+doc: a parameter used to define mu(r)
+interface: ezfio, provider, ocaml
+default: 0.5
+
 [thr_degen_tc]
 type: Threshold
 doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue
@@ -170,7 +200,7 @@ default: 1.e-7
 type: logical
 doc: If |true|, the integrals of the three-body jastrow are computed with cycles
 interface: ezfio,provider,ocaml
-default: True
+default: False
 
 [thresh_biorthog_diag]
 type: Threshold
@@ -214,4 +244,9 @@ doc: If |true|, save the bi-ortho wave functions in a sorted way
 interface: ezfio,provider,ocaml
 default: True
 
+[use_ipp]
+type: logical
+doc: If |true|, use Manu IPP
+interface: ezfio,provider,ocaml
+default: True
 
diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f
index 0b08f784..5d7d6b2e 100644
--- a/src/tc_scf/diis_tcscf.irp.f
+++ b/src/tc_scf/diis_tcscf.irp.f
@@ -87,22 +87,31 @@ BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)]
 
   implicit none
   integer                       :: i, j
+  double precision              :: t0, t1
   double precision, allocatable :: tmp(:,:)
   double precision, allocatable :: F(:,:)
 
+  !print *, ' Providing FQS_SQF_ao ...'
+  !call wall_time(t0)
+
   allocate(F(ao_num,ao_num))
   if(var_tc) then
+
     do i = 1, ao_num
       do j = 1, ao_num
         F(j,i) = Fock_matrix_vartc_ao_tot(j,i)
       enddo
     enddo
+
   else
+
+    PROVIDE Fock_matrix_tc_ao_tot
     do i = 1, ao_num
       do j = 1, ao_num
         F(j,i) = Fock_matrix_tc_ao_tot(j,i)
       enddo
     enddo
+
   endif
 
   allocate(tmp(ao_num,ao_num))
@@ -131,6 +140,9 @@ BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)]
   deallocate(tmp)
   deallocate(F)
 
+  !call wall_time(t1)
+  !print *, ' Wall time for FQS_SQF_ao =', t1-t0
+
 END_PROVIDER
 
 ! ---
@@ -138,10 +150,20 @@ END_PROVIDER
 BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)]
 
   implicit none
+  double precision :: t0, t1
+
+  !print*, ' Providing FQS_SQF_mo ...'
+  !call wall_time(t0)
+
+  PROVIDE mo_r_coef mo_l_coef
+  PROVIDE FQS_SQF_ao
 
   call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) &
                         , FQS_SQF_mo, size(FQS_SQF_mo, 1) )
 
+  !call wall_time(t1)
+  !print*, ' Wall time for FQS_SQF_mo =', t1-t0 
+
 END_PROVIDER
 
 ! ---
diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
index d8b962d7..3e624941 100644
--- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
+++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@@ -4,17 +4,27 @@
 BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
 
   implicit none
-  integer          :: a, b, i, j
-  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
-  double precision :: ti, tf
+  integer                       :: a, b, i, j
+  double precision              :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision              :: ti, tf
+  double precision, allocatable :: tmp(:,:)
 
   PROVIDE mo_l_coef mo_r_coef
+  call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij)
 
   !print *, ' PROVIDING fock_3e_uhf_mo_cs ...'
-  call wall_time(ti)
+  !call wall_time(ti)
 
   fock_3e_uhf_mo_cs = 0.d0
 
+  !$OMP PARALLEL DEFAULT (NONE)                                                                     &
+  !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) &
+  !$OMP SHARED  (mo_num, elec_beta_num, fock_3e_uhf_mo_cs)
+
+  allocate(tmp(mo_num,mo_num))
+  tmp = 0.d0
+
+  !$OMP DO
   do a = 1, mo_num
     do b = 1, mo_num
    
@@ -28,19 +38,31 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
           call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
           call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
 
-          fock_3e_uhf_mo_cs(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij &
-                                            +        I_bij_ija &
-                                            +        I_bij_jai &
-                                            - 2.d0 * I_bij_aji &
-                                            - 2.d0 * I_bij_iaj &
-                                            - 2.d0 * I_bij_jia )
+          tmp(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij &
+                              +        I_bij_ija &
+                              +        I_bij_jai &
+                              - 2.d0 * I_bij_aji &
+                              - 2.d0 * I_bij_iaj &
+                              - 2.d0 * I_bij_jia )
 
         enddo
       enddo
     enddo
   enddo
+  !$OMP END DO NOWAIT
 
-  call wall_time(tf)
+  !$OMP CRITICAL
+  do a = 1, mo_num
+    do b = 1, mo_num
+      fock_3e_uhf_mo_cs(b,a) += tmp(b,a)
+    enddo
+  enddo
+  !$OMP END CRITICAL
+
+  deallocate(tmp)
+  !$OMP END PARALLEL
+
+  !call wall_time(tf)
   !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti
 
 END_PROVIDER 
@@ -50,24 +72,38 @@ END_PROVIDER
 BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
 
   BEGIN_DOC
-! ALPHA part of the Fock matrix from three-electron terms 
-!
-! WARNING :: non hermitian if bi-ortho MOS used 
+  !
+  ! ALPHA part of the Fock matrix from three-electron terms 
+  !
+  ! WARNING :: non hermitian if bi-ortho MOS used 
+  !
   END_DOC
+
   implicit none
-  integer          :: a, b, i, j, o
-  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
-  double precision :: ti, tf
+  integer                       :: a, b, i, j, o
+  double precision              :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision              :: ti, tf
+  double precision, allocatable :: tmp(:,:)
 
   PROVIDE mo_l_coef mo_r_coef
+  PROVIDE fock_3e_uhf_mo_cs
 
-  !print *, ' PROVIDING fock_3e_uhf_mo_a ...'
-  call wall_time(ti)
+  !print *, ' Providing fock_3e_uhf_mo_a ...'
+  !call wall_time(ti)
 
   o = elec_beta_num + 1
+  call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij)
 
   fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs
 
+  !$OMP PARALLEL DEFAULT (NONE)                                                                     &
+  !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) &
+  !$OMP SHARED  (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_a)
+
+  allocate(tmp(mo_num,mo_num))
+  tmp = 0.d0
+
+  !$OMP DO
   do a = 1, mo_num
     do b = 1, mo_num
 
@@ -83,12 +119,12 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
           call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
           call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
 
-          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
-                                           +        I_bij_ija &
-                                           +        I_bij_jai &
-                                           -        I_bij_aji &
-                                           -        I_bij_iaj &
-                                           - 2.d0 * I_bij_jia )
+          tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                              +        I_bij_ija &
+                              +        I_bij_jai &
+                              -        I_bij_aji &
+                              -        I_bij_iaj &
+                              - 2.d0 * I_bij_jia )
 
         enddo
       enddo
@@ -105,12 +141,12 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
           call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
           call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
 
-          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
-                                           +        I_bij_ija &
-                                           +        I_bij_jai &
-                                           -        I_bij_aji &
-                                           - 2.d0 * I_bij_iaj &
-                                           -        I_bij_jia )
+          tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                              +        I_bij_ija &
+                              +        I_bij_jai &
+                              -        I_bij_aji &
+                              - 2.d0 * I_bij_iaj &
+                              -        I_bij_jia )
 
         enddo
       enddo
@@ -127,12 +163,12 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
           call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
           call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
 
-          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( I_bij_aij &
-                                           + I_bij_ija &
-                                           + I_bij_jai &
-                                           - I_bij_aji &
-                                           - I_bij_iaj &
-                                           - I_bij_jia )
+          tmp(b,a) -= 0.5d0 * ( I_bij_aij &
+                              + I_bij_ija &
+                              + I_bij_jai &
+                              - I_bij_aji &
+                              - I_bij_iaj &
+                              - I_bij_jia )
 
         enddo
       enddo
@@ -141,35 +177,58 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
 
     enddo
   enddo
+  !$OMP END DO NOWAIT
 
-  call wall_time(tf)
-  !print *, ' total Wall time for fock_3e_uhf_mo_a =', tf - ti
+  !$OMP CRITICAL
+  do a = 1, mo_num
+    do b = 1, mo_num
+      fock_3e_uhf_mo_a(b,a) += tmp(b,a)
+    enddo
+  enddo
+  !$OMP END CRITICAL
+
+  deallocate(tmp)
+  !$OMP END PARALLEL
+
+  !call wall_time(tf)
+  !print *, ' Wall time for fock_3e_uhf_mo_a =', tf - ti
 
 END_PROVIDER 
 
 ! ---
 
 BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
+
   BEGIN_DOC
-! BETA part of the Fock matrix from three-electron terms 
-!
-! WARNING :: non hermitian if bi-ortho MOS used 
+  ! BETA part of the Fock matrix from three-electron terms 
+  !
+  ! WARNING :: non hermitian if bi-ortho MOS used 
   END_DOC
 
   implicit none
-  integer          :: a, b, i, j, o
-  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
-  double precision :: ti, tf
+  integer                       :: a, b, i, j, o
+  double precision              :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision              :: ti, tf
+  double precision, allocatable :: tmp(:,:)
 
   PROVIDE mo_l_coef mo_r_coef
 
   !print *, ' PROVIDING fock_3e_uhf_mo_b ...'
-  call wall_time(ti)
+  !call wall_time(ti)
 
   o = elec_beta_num + 1
+  call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, I_bij_aij)
 
   fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs
 
+  !$OMP PARALLEL DEFAULT (NONE)                                                                     &
+  !$OMP PRIVATE (a, b, i, j, I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia, tmp) &
+  !$OMP SHARED  (mo_num, o, elec_alpha_num, elec_beta_num, fock_3e_uhf_mo_b)
+
+  allocate(tmp(mo_num,mo_num))
+  tmp = 0.d0
+
+  !$OMP DO
   do a = 1, mo_num
     do b = 1, mo_num
 
@@ -185,9 +244,9 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
           call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
           call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
 
-          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
-                                           -        I_bij_aji &
-                                           -        I_bij_iaj )
+          tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                              -        I_bij_aji &
+                              -        I_bij_iaj )
 
         enddo
       enddo
@@ -204,9 +263,9 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
           call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
           call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
 
-          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
-                                           -        I_bij_aji &
-                                           -        I_bij_jia )
+          tmp(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                              -        I_bij_aji &
+                              -        I_bij_jia )
 
         enddo
       enddo
@@ -223,8 +282,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
           call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
           call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
 
-          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( I_bij_aij &
-                                           - I_bij_aji )
+          tmp(b,a) -= 0.5d0 * ( I_bij_aij &
+                              - I_bij_aji )
 
         enddo
       enddo
@@ -233,8 +292,20 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
 
     enddo
   enddo
+  !$OMP END DO NOWAIT
 
-  call wall_time(tf)
+  !$OMP CRITICAL
+  do a = 1, mo_num
+    do b = 1, mo_num
+      fock_3e_uhf_mo_b(b,a) += tmp(b,a)
+    enddo
+  enddo
+  !$OMP END CRITICAL
+
+  deallocate(tmp)
+  !$OMP END PARALLEL
+
+  !call wall_time(tf)
   !print *, ' total Wall time for fock_3e_uhf_mo_b =', tf - ti
 
 END_PROVIDER 
@@ -267,15 +338,15 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)]
 
   fock_3e_uhf_ao_a = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                                                &
- !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
- !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
- !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a)
+  !$OMP PARALLEL DEFAULT (NONE)                                                                &
+  !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+  !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+  !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a)
 
   allocate(f_tmp(ao_num,ao_num))
   f_tmp = 0.d0
 
- !$OMP DO
+  !$OMP DO
   do g = 1, ao_num
     do e = 1, ao_num
       dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
@@ -307,18 +378,18 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)]
       enddo
     enddo
   enddo
- !$OMP END DO NOWAIT
+  !$OMP END DO NOWAIT
 
- !$OMP CRITICAL
+  !$OMP CRITICAL
   do mu = 1, ao_num
     do nu = 1, ao_num
       fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu)
     enddo
   enddo
- !$OMP END CRITICAL
+  !$OMP END CRITICAL
 
   deallocate(f_tmp)
- !$OMP END PARALLEL
+  !$OMP END PARALLEL
 
   call wall_time(tf)
   print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti
@@ -353,15 +424,15 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)]
 
   fock_3e_uhf_ao_b = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                                                &
- !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
- !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
- !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b)
+  !$OMP PARALLEL DEFAULT (NONE)                                                                &
+  !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+  !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+  !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b)
 
   allocate(f_tmp(ao_num,ao_num))
   f_tmp = 0.d0
 
- !$OMP DO
+  !$OMP DO
   do g = 1, ao_num
     do e = 1, ao_num
       dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
@@ -393,18 +464,18 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)]
       enddo
     enddo
   enddo
- !$OMP END DO NOWAIT
+  !$OMP END DO NOWAIT
 
- !$OMP CRITICAL
+  !$OMP CRITICAL
   do mu = 1, ao_num
     do nu = 1, ao_num
       fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu)
     enddo
   enddo
- !$OMP END CRITICAL
+  !$OMP END CRITICAL
 
   deallocate(f_tmp)
- !$OMP END PARALLEL
+  !$OMP END PARALLEL
 
   call wall_time(tf)
   print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
index 1d651c4e..0ae515bb 100644
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@@ -18,6 +18,8 @@
   double precision :: density, density_a, density_b
   double precision :: t0, t1
 
+  PROVIDE ao_two_e_tc_tot
+
   !print*, ' providing two_e_tc_non_hermit_integral_seq ...'
   !call wall_time(t0)
 
@@ -80,22 +82,26 @@ END_PROVIDER
   double precision              :: t0, t1
   double precision, allocatable :: tmp_a(:,:), tmp_b(:,:)
 
-  !print*, ' providing two_e_tc_non_hermit_integral ...'
+  PROVIDE ao_two_e_tc_tot
+  PROVIDE mo_l_coef mo_r_coef
+  PROVIDE TCSCF_density_matrix_ao_alpha TCSCF_density_matrix_ao_beta
+
+  !print*, ' Providing two_e_tc_non_hermit_integral ...'
   !call wall_time(t0)
 
   two_e_tc_non_hermit_integral_alpha = 0.d0
   two_e_tc_non_hermit_integral_beta  = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                                                        &
- !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli)              &
- !$OMP SHARED  (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, &
- !$OMP         two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta)
+  !$OMP PARALLEL DEFAULT (NONE)                                                                        &
+  !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli)              &
+  !$OMP SHARED  (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, &
+  !$OMP         two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta)
 
   allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num))
   tmp_a = 0.d0
   tmp_b = 0.d0
 
- !$OMP DO
+  !$OMP DO
   do j = 1, ao_num
     do l = 1, ao_num
       density_a = TCSCF_density_matrix_ao_alpha(l,j)
@@ -113,22 +119,22 @@ END_PROVIDER
       enddo
     enddo
   enddo
- !$OMP END DO NOWAIT
+  !$OMP END DO NOWAIT
 
- !$OMP CRITICAL
+  !$OMP CRITICAL
   do i = 1, ao_num
     do j = 1, ao_num
       two_e_tc_non_hermit_integral_alpha(j,i) += tmp_a(j,i)
       two_e_tc_non_hermit_integral_beta (j,i) += tmp_b(j,i)
     enddo
   enddo
- !$OMP END CRITICAL
+  !$OMP END CRITICAL
 
   deallocate(tmp_a, tmp_b)
- !$OMP END PARALLEL
+  !$OMP END PARALLEL
 
   !call wall_time(t1)
-  !print*, ' wall time for two_e_tc_non_hermit_integral after = ', t1 - t0
+  !print*, ' Wall time for two_e_tc_non_hermit_integral = ', t1 - t0
 
 END_PROVIDER 
 
@@ -141,8 +147,15 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_alpha, (ao_num, ao_num)]
   END_DOC
 
   implicit none
+  double precision :: t0, t1
 
-  Fock_matrix_tc_ao_alpha =  ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha 
+  !print*, ' Providing Fock_matrix_tc_ao_alpha ...'
+  !call wall_time(t0)
+
+  Fock_matrix_tc_ao_alpha = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha
+
+  !call wall_time(t1)
+  !print*, ' Wall time for Fock_matrix_tc_ao_alpha =', t1-t0
 
 END_PROVIDER 
 
@@ -169,8 +182,12 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
   END_DOC
 
   implicit none
+  double precision              :: t0, t1, tt0, tt1
   double precision, allocatable :: tmp(:,:)
 
+  !print*, ' Providing Fock_matrix_tc_mo_alpha ...'
+  !call wall_time(t0)
+
   if(bi_ortho) then
 
     !allocate(tmp(ao_num,ao_num))
@@ -181,19 +198,34 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
     !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1))
     !deallocate(tmp)
 
+    PROVIDE mo_l_coef mo_r_coef
+
+    !call wall_time(tt0)
     call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
                           , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+    !call wall_time(tt1)
+    !print*, ' 2-e term:', tt1-tt0
+
     if(three_body_h_tc) then
+      !call wall_time(tt0)
+      !PROVIDE fock_a_tot_3e_bi_orth
       !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
+      PROVIDE fock_3e_uhf_mo_a
       Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a
+      !call wall_time(tt1)
+      !print*, ' 3-e term:', tt1-tt0
     endif
 
   else
+
     call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
                  , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
 
   endif
 
+  !call wall_time(t1)
+  !print*, ' Wall time for Fock_matrix_tc_mo_alpha =', t1-t0
+
 END_PROVIDER
 
 ! ---
@@ -220,7 +252,9 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
     call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
                           , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
     if(three_body_h_tc) then
+      !PROVIDE fock_b_tot_3e_bi_orth
       !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
+      PROVIDE fock_3e_uhf_mo_b
       Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b
     endif
 
@@ -275,10 +309,20 @@ END_PROVIDER
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
 
   implicit none
+  double precision :: t0, t1
+
+  !print*, ' Providing Fock_matrix_tc_ao_tot ...'
+  !call wall_time(t0)
+
+  PROVIDE mo_l_coef mo_r_coef
+  PROVIDE Fock_matrix_tc_mo_tot
 
   call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) &
                         , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) )
 
+  !call wall_time(t1)
+  !print*, ' Wall time for Fock_matrix_tc_ao_tot =', t1-t0
+
 END_PROVIDER
 
 ! ---
diff --git a/src/tc_scf/fock_tc_mo_tot.irp.f b/src/tc_scf/fock_tc_mo_tot.irp.f
index a03a0624..eb8973ff 100644
--- a/src/tc_scf/fock_tc_mo_tot.irp.f
+++ b/src/tc_scf/fock_tc_mo_tot.irp.f
@@ -1,107 +1,124 @@
 
  BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num,mo_num) ]
 &BEGIN_PROVIDER [ double precision, Fock_matrix_tc_diag_mo_tot, (mo_num)]
-   implicit none
-   BEGIN_DOC
-   ! TC-Fock matrix on the MO basis. WARNING !!! NON HERMITIAN !!!
-   ! For open shells, the ROHF Fock Matrix is ::
-   !
-   !       |   F-K    |  F + K/2  |    F     |
-   !       |---------------------------------|
-   !       | F + K/2  |     F     |  F - K/2 |
-   !       |---------------------------------|
-   !       |    F     |  F - K/2  |  F + K   |
-   !
-   !
-   ! F = 1/2 (Fa + Fb)
-   !
-   ! K = Fb - Fa
-   !
-   END_DOC
-   integer                        :: i,j,n
-   if (elec_alpha_num == elec_beta_num) then
-     Fock_matrix_tc_mo_tot = Fock_matrix_tc_mo_alpha
-   else
 
-     do j=1,elec_beta_num
-       ! F-K
-       do i=1,elec_beta_num !CC
-         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+  BEGIN_DOC
+  ! TC-Fock matrix on the MO basis. WARNING !!! NON HERMITIAN !!!
+  ! For open shells, the ROHF Fock Matrix is ::
+  !
+  !       |   F-K    |  F + K/2  |    F     |
+  !       |---------------------------------|
+  !       | F + K/2  |     F     |  F - K/2 |
+  !       |---------------------------------|
+  !       |    F     |  F - K/2  |  F + K   |
+  !
+  !
+  ! F = 1/2 (Fa + Fb)
+  !
+  ! K = Fb - Fa
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, n
+  double precision :: t0, t1
+
+  !print*, ' Providing Fock_matrix_tc_mo_tot ...'
+  !call wall_time(t0)
+
+  if(elec_alpha_num == elec_beta_num) then
+
+    PROVIDE Fock_matrix_tc_mo_alpha
+
+    Fock_matrix_tc_mo_tot = Fock_matrix_tc_mo_alpha
+
+  else
+
+    PROVIDE Fock_matrix_tc_mo_beta Fock_matrix_tc_mo_alpha
+
+    do j = 1, elec_beta_num
+      ! F-K
+      do i = 1, elec_beta_num !CC
+        Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
              - (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
-       enddo
-       ! F+K/2
-       do i=elec_beta_num+1,elec_alpha_num  !CA
-         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+      enddo
+      ! F+K/2
+      do i = elec_beta_num+1, elec_alpha_num  !CA
+        Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
              + 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
-       enddo
-       ! F
-       do i=elec_alpha_num+1, mo_num !CV
-         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
-       enddo
-     enddo
+      enddo
+      ! F
+      do i = elec_alpha_num+1, mo_num !CV
+        Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
+      enddo
+    enddo
 
-     do j=elec_beta_num+1,elec_alpha_num
-       ! F+K/2
-       do i=1,elec_beta_num !AC
-         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+    do j = elec_beta_num+1, elec_alpha_num
+      ! F+K/2
+      do i = 1, elec_beta_num !AC
+        Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
              + 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
-       enddo
-       ! F
-       do i=elec_beta_num+1,elec_alpha_num !AA
-         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
-       enddo
-       ! F-K/2
-       do i=elec_alpha_num+1, mo_num !AV
-         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+      enddo
+      ! F
+      do i = elec_beta_num+1, elec_alpha_num !AA
+        Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
+      enddo
+      ! F-K/2
+      do i = elec_alpha_num+1, mo_num !AV
+        Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
              - 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
-       enddo
-     enddo
+      enddo
+    enddo
 
-     do j=elec_alpha_num+1, mo_num
-       ! F
-       do i=1,elec_beta_num !VC
-         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
-       enddo
-       ! F-K/2
-       do i=elec_beta_num+1,elec_alpha_num !VA
-         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+    do j = elec_alpha_num+1, mo_num
+      ! F
+      do i = 1, elec_beta_num !VC
+        Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
+      enddo
+      ! F-K/2
+      do i = elec_beta_num+1, elec_alpha_num !VA
+        Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
              - 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
-       enddo
-       ! F+K
-       do i=elec_alpha_num+1,mo_num !VV
-         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) &
+      enddo
+      ! F+K
+      do i = elec_alpha_num+1, mo_num !VV
+        Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) &
              + (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
-       enddo
-     enddo
-     if(three_body_h_tc)then
+      enddo
+    enddo
+
+    if(three_body_h_tc) then
+
+      PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+
       ! C-O
       do j = 1, elec_beta_num
-       do i = elec_beta_num+1, elec_alpha_num
-        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
-        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
-       enddo
+        do i = elec_beta_num+1, elec_alpha_num
+          Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+          Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+        enddo
       enddo
       ! C-V
       do j = 1, elec_beta_num
-       do i = elec_alpha_num+1, mo_num
-        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
-        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
-       enddo
+        do i = elec_alpha_num+1, mo_num
+          Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+          Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+        enddo
       enddo
       ! O-V
       do j = elec_beta_num+1, elec_alpha_num
-       do i = elec_alpha_num+1, mo_num
-        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
-        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
-       enddo
+        do i = elec_alpha_num+1, mo_num
+          Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+          Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+        enddo
       enddo
-     endif
+    endif
 
-   endif
+  endif
 
-   do i = 1, mo_num
-     Fock_matrix_tc_diag_mo_tot(i) = Fock_matrix_tc_mo_tot(i,i)
-   enddo
+  do i = 1, mo_num
+    Fock_matrix_tc_diag_mo_tot(i) = Fock_matrix_tc_mo_tot(i,i)
+  enddo
 
 
    if(frozen_orb_scf)then
@@ -116,29 +133,33 @@
      enddo
    endif
 
-   if(no_oa_or_av_opt)then
-     do i = 1, n_act_orb
-       iorb = list_act(i)
-       do j = 1, n_inact_orb
-         jorb = list_inact(j)
-         Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
-         Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0
-       enddo
-       do j = 1, n_virt_orb
-         jorb = list_virt(j)
-         Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
-         Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0
-       enddo
-       do j = 1, n_core_orb
-         jorb = list_core(j)
-         Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
-         Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0                                                                                                                 
-       enddo
-     enddo
-   endif
-  if(.not.bi_ortho .and. three_body_h_tc)then
-   Fock_matrix_tc_mo_tot += fock_3_mat
+  if(no_oa_or_av_opt)then
+    do i = 1, n_act_orb
+      iorb = list_act(i)
+      do j = 1, n_inact_orb
+        jorb = list_inact(j)
+        Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+        Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0
+      enddo
+      do j = 1, n_virt_orb
+        jorb = list_virt(j)
+        Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+        Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0
+      enddo
+      do j = 1, n_core_orb
+        jorb = list_core(j)
+        Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+        Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0                                                                                                                 
+      enddo
+    enddo
   endif
 
+  if(.not.bi_ortho .and. three_body_h_tc)then
+    Fock_matrix_tc_mo_tot += fock_3_mat
+  endif
+
+  !call wall_time(t1)
+  !print*, ' Wall time for Fock_matrix_tc_mo_tot =', t1-t0
+
 END_PROVIDER
 
diff --git a/src/tc_scf/fock_three_bi_ortho.irp.f b/src/tc_scf/fock_three_bi_ortho.irp.f
index 7c776ce5..5d2f199c 100644
--- a/src/tc_scf/fock_three_bi_ortho.irp.f
+++ b/src/tc_scf/fock_three_bi_ortho.irp.f
@@ -4,14 +4,24 @@
 BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)]
 
   BEGIN_DOC
-! Alpha part of the Fock matrix from three-electron terms 
-!
-! WARNING :: non hermitian if bi-ortho MOS used 
+  !
+  ! Alpha part of the Fock matrix from three-electron terms 
+  !
+  ! WARNING :: non hermitian if bi-ortho MOS used 
+  !
+  ! This calculation becomes the dominant part one the integrals are provided
+  ! 
   END_DOC
+
   implicit none
-  integer :: i, a
+  integer          :: i, a
+  double precision :: t0, t1
+
+  !print*, ' Providing fock_a_tot_3e_bi_orth ...'
+  !call wall_time(t0)
 
   PROVIDE mo_l_coef mo_r_coef
+  PROVIDE fock_cs_3e_bi_orth fock_a_tmp1_bi_ortho fock_a_tmp2_bi_ortho
 
   fock_a_tot_3e_bi_orth = 0.d0
 
@@ -23,6 +33,9 @@ BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)]
     enddo
   enddo
 
+  !call wall_time(t1)
+  !print*, ' Wall time for fock_a_tot_3e_bi_orth =', t1-t0 
+
 END_PROVIDER 
 
 ! ---
@@ -30,10 +43,15 @@ END_PROVIDER
 BEGIN_PROVIDER [double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)]
 
   BEGIN_DOC
-! Beta part of the Fock matrix from three-electron terms 
-!
-! WARNING :: non hermitian if bi-ortho MOS used 
+  !
+  ! Beta part of the Fock matrix from three-electron terms 
+  !
+  ! WARNING :: non hermitian if bi-ortho MOS used 
+  !
+  ! This calculation becomes the dominant part one the integrals are provided
+  !
   END_DOC
+
   implicit none
   integer :: i, a
 
@@ -56,15 +74,30 @@ END_PROVIDER
 BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)]
 
   implicit none
-  integer          :: i, a, j, k
-  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
-  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
-  double precision :: new
+  integer                       :: i, a, j, k
+  double precision              :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision              :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp(:,:)
+
+  !print*, ' Providing fock_cs_3e_bi_orth ...'
+  !call wall_time(t0)
 
   PROVIDE mo_l_coef mo_r_coef
 
+  ! to PROVIDE stuffs
+  call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, contrib) 
+
   fock_cs_3e_bi_orth = 0.d0
 
+  !$OMP PARALLEL DEFAULT (NONE)                                                                              &
+  !$OMP PRIVATE (i, a, j, k, direct_int, c_3_int, c_minus_3_int, exch_13_int, exch_23_int, exch_12_int, tmp) &
+  !$OMP SHARED  (mo_num, elec_beta_num, fock_cs_3e_bi_orth)
+
+  allocate(tmp(mo_num,mo_num))
+  tmp = 0.d0
+
+  !$OMP DO
   do i = 1, mo_num
     do a = 1, mo_num
     
@@ -85,16 +118,29 @@ BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)]
           call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
           call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
 
-          new = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) -1.5d0 * exch_13_int - exch_23_int
-
-          fock_cs_3e_bi_orth(a,i) += new
+          tmp(a,i) += 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) -1.5d0 * exch_13_int - exch_23_int
         enddo
       enddo
     enddo
   enddo
+  !$OMP END DO NOWAIT
+
+  !$OMP CRITICAL
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_cs_3e_bi_orth(a,i) += tmp(a,i)
+    enddo
+  enddo
+  !$OMP END CRITICAL
+
+  deallocate(tmp)
+  !$OMP END PARALLEL
  
   fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
 
+  !call wall_time(t1)
+  !print*, ' Wall time for fock_cs_3e_bi_orth =', t1-t0
+
 END_PROVIDER 
 
 ! ---
@@ -102,20 +148,37 @@ END_PROVIDER
 BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)]
 
   implicit none
-  integer          :: i, a, j, k
-  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
-  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
-  double precision :: new
+  integer                       :: i, a, j, k, ee
+  double precision              :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision              :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp(:,:)
+
+  !print*, ' Providing fock_a_tmp1_bi_ortho ...'
+  !call wall_time(t0)
 
   PROVIDE mo_l_coef mo_r_coef
 
+  ! to PROVIDE stuffs
+  call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, contrib) 
+
+  ee = elec_beta_num + 1
   fock_a_tmp1_bi_ortho = 0.d0
 
+  !$OMP PARALLEL DEFAULT (NONE)                                                                              &
+  !$OMP PRIVATE (i, a, j, k, direct_int, c_3_int, c_minus_3_int, exch_13_int, exch_23_int, exch_12_int, tmp) &
+  !$OMP SHARED  (mo_num, elec_alpha_num, elec_beta_num, ee, fock_a_tmp1_bi_ortho)
+
+  allocate(tmp(mo_num,mo_num))
+  tmp = 0.d0
+
+  !$OMP DO
   do i = 1, mo_num
     do a = 1, mo_num
-    
-      do j = elec_beta_num + 1, elec_alpha_num 
+
+      do j = ee, elec_alpha_num 
         do k = 1, elec_beta_num
+
           call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
           call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
           call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
@@ -123,14 +186,29 @@ BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)]
           call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
           call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
           
-          fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
+          tmp(a,i) += 1.5d0 * (direct_int - exch_13_int) + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
         enddo
       enddo
     enddo
   enddo
+  !$OMP END DO NOWAIT
+
+  !$OMP CRITICAL
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_a_tmp1_bi_ortho(a,i) += tmp(a,i)
+    enddo
+  enddo
+  !$OMP END CRITICAL
+
+  deallocate(tmp)
+  !$OMP END PARALLEL
 
   fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
 
+  !call wall_time(t1)
+  !print*, ' Wall time for fock_a_tmp1_bi_ortho =', t1-t0
+
 END_PROVIDER 
 
 ! ---
@@ -138,24 +216,56 @@ END_PROVIDER
 BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)]
 
   implicit none
-  integer          :: i, a, j, k
-  double precision :: contrib_sss
+  integer                       :: i, a, j, k, ee
+  double precision              :: contrib_sss
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp(:,:)
+
+  !print*, ' Providing fock_a_tmp2_bi_ortho ...'
+  !call wall_time(t0)
 
   PROVIDE mo_l_coef mo_r_coef
  
+  ! to PROVIDE stuffs
+  call contrib_3e_sss(1, 1, 1, 1, contrib_sss)
+
+  ee = elec_beta_num + 1
   fock_a_tmp2_bi_ortho = 0.d0
 
+  !$OMP PARALLEL DEFAULT (NONE)                &
+  !$OMP PRIVATE (i, a, j, k, contrib_sss, tmp) &
+  !$OMP SHARED  (mo_num, elec_alpha_num, ee, fock_a_tmp2_bi_ortho)
+
+  allocate(tmp(mo_num,mo_num))
+  tmp = 0.d0
+
+  !$OMP DO
   do i = 1, mo_num
     do a = 1, mo_num
       do j = 1, elec_alpha_num
-        do k = elec_beta_num+1, elec_alpha_num
+        do k = ee, elec_alpha_num
           call contrib_3e_sss(a, i, j, k, contrib_sss)
 
-          fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+          tmp(a,i) += 0.5d0 * contrib_sss
         enddo
       enddo
     enddo
   enddo
+  !$OMP END DO NOWAIT
+
+  !$OMP CRITICAL
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_a_tmp2_bi_ortho(a,i) += tmp(a,i)
+    enddo
+  enddo
+  !$OMP END CRITICAL
+
+  deallocate(tmp)
+  !$OMP END PARALLEL
+
+  !call wall_time(t1)
+  !print*, ' Wall time for fock_a_tmp2_bi_ortho =', t1-t0
 
 END_PROVIDER 
 
@@ -164,30 +274,61 @@ END_PROVIDER
 BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)]
 
   implicit none
-  integer          :: i, a, j, k
-  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
-  double precision :: new
+  integer                       :: i, a, j, k, ee
+  double precision              :: direct_int, exch_13_int, exch_23_int, exch_12_int
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp(:,:)
+
+  !print*, ' Providing fock_b_tmp1_bi_ortho ...'
+  !call wall_time(t0)
 
   PROVIDE mo_l_coef mo_r_coef
 
+  ! to PROVIDE stuffs
+  call give_integrals_3_body_bi_ort(1, 1, 1, 1, 1, 1, direct_int) 
+
+  ee = elec_beta_num + 1
   fock_b_tmp1_bi_ortho = 0.d0
 
+  !$OMP PARALLEL DEFAULT (NONE)                                         &
+  !$OMP PRIVATE (i, a, j, k, direct_int, exch_13_int, exch_23_int, tmp) &
+  !$OMP SHARED  (mo_num, elec_beta_num, elec_alpha_num, ee, fock_b_tmp1_bi_ortho)
+
+  allocate(tmp(mo_num,mo_num))
+  tmp = 0.d0
+
+  !$OMP DO
   do i = 1, mo_num
     do a = 1, mo_num
       do j = 1, elec_beta_num
-        do k = elec_beta_num+1, elec_alpha_num
+        do k = ee, elec_alpha_num
           call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
           call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
           call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
 
-          fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
+          tmp(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
         enddo
       enddo
     enddo
   enddo
+  !$OMP END DO NOWAIT
+
+  !$OMP CRITICAL
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_b_tmp1_bi_ortho(a,i) += tmp(a,i)
+    enddo
+  enddo
+  !$OMP END CRITICAL
+
+  deallocate(tmp)
+  !$OMP END PARALLEL
 
   fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
 
+  !call wall_time(t1)
+  !print*, ' Wall time for fock_b_tmp1_bi_ortho =', t1-t0
+
 END_PROVIDER 
 
 ! ---
@@ -195,24 +336,56 @@ END_PROVIDER
 BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)]
 
   implicit none
-  integer          :: i, a, j, k
-  double precision :: contrib_soo
+  integer                       :: i, a, j, k, ee
+  double precision              :: contrib_soo
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp(:,:)
+
+  !print*, ' Providing fock_b_tmp2_bi_ortho ...'
+  !call wall_time(t0)
 
   PROVIDE mo_l_coef mo_r_coef
 
+  ! to PROVIDE stuffs
+  call contrib_3e_soo(1, 1, 1, 1, contrib_soo)
+
+  ee = elec_beta_num + 1
   fock_b_tmp2_bi_ortho = 0.d0
 
+  !$OMP PARALLEL DEFAULT (NONE)                &
+  !$OMP PRIVATE (i, a, j, k, contrib_soo, tmp) &
+  !$OMP SHARED  (mo_num, elec_alpha_num, ee, fock_b_tmp2_bi_ortho)
+
+  allocate(tmp(mo_num,mo_num))
+  tmp = 0.d0
+
+  !$OMP DO
   do i = 1, mo_num
     do a = 1, mo_num
-      do j = elec_beta_num + 1, elec_alpha_num 
+      do j = ee, elec_alpha_num 
         do k = 1, elec_alpha_num
           call contrib_3e_soo(a, i, j, k, contrib_soo)
 
-          fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+          tmp(a,i) += 0.5d0 * contrib_soo
         enddo
       enddo
     enddo
   enddo
+  !$OMP END DO NOWAIT
+
+  !$OMP CRITICAL
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_b_tmp2_bi_ortho(a,i) += tmp(a,i)
+    enddo
+  enddo
+  !$OMP END CRITICAL
+
+  deallocate(tmp)
+  !$OMP END PARALLEL
+
+  !call wall_time(t1)
+  !print*, ' Wall time for fock_b_tmp2_bi_ortho =', t1-t0
 
 END_PROVIDER 
 
diff --git a/src/tc_scf/minimize_tc_angles.irp.f b/src/tc_scf/minimize_tc_angles.irp.f
index 1363e62b..5d3ff7f0 100644
--- a/src/tc_scf/minimize_tc_angles.irp.f
+++ b/src/tc_scf/minimize_tc_angles.irp.f
@@ -8,6 +8,10 @@ program print_angles
   my_n_pt_a_grid = 50
   touch my_n_pt_r_grid my_n_pt_a_grid
 !  call sort_by_tc_fock
+
+  ! TODO
+  ! check if rotations of orbitals affect the TC energy
+  ! and refuse the step
   call minimize_tc_orb_angles
 end
 
diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f
index 645742c8..20260a95 100644
--- a/src/tc_scf/rh_tcscf_diis.irp.f
+++ b/src/tc_scf/rh_tcscf_diis.irp.f
@@ -1,5 +1,10 @@
 ! ---
 
+! TODO
+! level shift of SCF is well adapted
+! for 0.5 x F
+! 
+
 subroutine rh_tcscf_diis()
 
   implicit none
@@ -231,8 +236,7 @@ subroutine rh_tcscf_diis()
   ! ---
 
   print *, ' TCSCF DIIS converged !'
-  call print_energy_and_mos()
-
+  !call print_energy_and_mos(good_angles)
   call write_time(6)
 
   deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS)
diff --git a/src/tc_scf/rh_tcscf_simple.irp.f b/src/tc_scf/rh_tcscf_simple.irp.f
index 30798e3d..0b79e8ea 100644
--- a/src/tc_scf/rh_tcscf_simple.irp.f
+++ b/src/tc_scf/rh_tcscf_simple.irp.f
@@ -119,7 +119,7 @@ subroutine rh_tcscf_simple()
   endif
 
   print *, ' TCSCF Simple converged !'
-  call print_energy_and_mos()
+  !call print_energy_and_mos(good_angles)
 
   deallocate(rho_old, rho_new)
 
diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f
index 31999c18..2567faf0 100644
--- a/src/tc_scf/rotate_tcscf_orbitals.irp.f
+++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f
@@ -17,7 +17,8 @@ program rotate_tcscf_orbitals
   bi_ortho = .True.
   touch bi_ortho
 
-  call maximize_overlap()
+  call minimize_tc_orb_angles()
+  !call maximize_overlap()
 
 end
 
diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f
index 3c12118f..755c35b9 100644
--- a/src/tc_scf/routines_rotates.irp.f
+++ b/src/tc_scf/routines_rotates.irp.f
@@ -46,6 +46,7 @@ end subroutine LTxR
 ! ---
 
 subroutine minimize_tc_orb_angles()
+
   BEGIN_DOC
   ! routine that minimizes the angle between left- and right-orbitals when degeneracies are found
   END_DOC
@@ -58,7 +59,7 @@ subroutine minimize_tc_orb_angles()
   good_angles = .False.
   thr_deg = thr_degen_tc
 
-  call print_energy_and_mos()
+  call print_energy_and_mos(good_angles)
 
   print *, ' Minimizing the angles between the TC orbitals'
   i = 1
@@ -77,7 +78,7 @@ subroutine minimize_tc_orb_angles()
   print *, ' Converged ANGLES MINIMIZATION !!'
 
   call print_angles_tc()
-  call print_energy_and_mos()
+  call print_energy_and_mos(good_angles)
 
 end
 
@@ -92,14 +93,22 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
 
   integer                       :: i, j, k, n_degen_list, m, n, n_degen, ilast, ifirst
   double precision              :: max_angle, norm
+  double precision              :: E_old, E_new, E_thr
   integer,          allocatable :: list_degen(:,:)
   double precision, allocatable :: new_angles(:)
+  double precision, allocatable :: mo_r_coef_old(:,:), mo_l_coef_old(:,:)
   double precision, allocatable :: mo_r_coef_good(:,:), mo_l_coef_good(:,:)
   double precision, allocatable :: mo_r_coef_new(:,:)
-  double precision, allocatable :: fock_diag(:),s_mat(:,:)
+  double precision, allocatable :: fock_diag(:), s_mat(:,:)
   double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:)
   double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:)
 
+  E_thr = 1d-8
+  E_old = TC_HF_energy
+  allocate(mo_l_coef_old(ao_num,mo_num), mo_r_coef_old(ao_num,mo_num))
+  mo_r_coef_old = mo_r_coef
+  mo_l_coef_old = mo_l_coef
+
   good_angles = .False.
 
   allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num))
@@ -252,11 +261,32 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
   call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
   TOUCH mo_l_coef mo_r_coef
 
+  ! check if TC energy has changed
+  E_new = TC_HF_energy
+  if(dabs(E_new - E_old) .gt. E_thr) then
+    mo_r_coef = mo_r_coef_old
+    mo_l_coef = mo_l_coef_old
+    deallocate(mo_l_coef_old, mo_r_coef_old)
+    call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+    call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    TOUCH mo_l_coef mo_r_coef
+    print*, ' TC energy bef rotation = ', E_old
+    print*, ' TC energy aft rotation = ', E_new
+    print*, ' the rotation is refused'
+    stop
+  endif
+
   allocate(new_angles(mo_num))
   new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num))
   max_angle = maxval(new_angles)
   good_angles = max_angle.lt.45.d0
   print *, ' max_angle = ', max_angle
+  deallocate(new_angles)
+
+
+  deallocate(mo_l_coef_old, mo_r_coef_old)
+  deallocate(mo_l_coef_good, mo_r_coef_good)
+  deallocate(mo_r_coef_new)
   
 end
 
@@ -356,22 +386,26 @@ end
 
 ! ---
 
-subroutine print_energy_and_mos()
+subroutine print_energy_and_mos(good_angles)
 
   implicit none
-  integer :: i
+  logical, intent(out) :: good_angles
+  integer              :: i
 
   print *, ' '
-  print *, ' TC energy = ', TC_HF_energy
+  print *, ' TC energy              = ', TC_HF_energy
   print *, ' TC SCF energy gradient = ', grad_non_hermit
   print *, ' Max angle Left/right   = ', max_angle_left_right
 
   if(max_angle_left_right .lt. 45.d0) then
     print *, ' Maximum angle BELOW 45 degrees, everthing is OK !'
+    good_angles = .true.
   else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then
     print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...'
+    good_angles = .false.
   else if(max_angle_left_right .gt. 75.d0) then
     print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...'
+    good_angles = .false.
   endif
 
   print *, ' Diag Fock elem, product of left/right norm, angle left/right '
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
index 85389f30..88ddd26c 100644
--- a/src/tc_scf/tc_scf.irp.f
+++ b/src/tc_scf/tc_scf.irp.f
@@ -54,7 +54,6 @@ program tc_scf
     endif
 
     call minimize_tc_orb_angles()
-    call print_energy_and_mos()
 
   endif
 
diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f
index c3de0322..833b48aa 100644
--- a/src/tc_scf/tc_scf_energy.irp.f
+++ b/src/tc_scf/tc_scf_energy.irp.f
@@ -1,5 +1,5 @@
 
- BEGIN_PROVIDER [ double precision, TC_HF_energy]
+ BEGIN_PROVIDER [ double precision, TC_HF_energy      ]
 &BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy]
 &BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy]
 
@@ -8,9 +8,14 @@
   END_DOC
 
   implicit none
-  integer :: i, j
+  integer          :: i, j
+  double precision :: t0, t1
+
+  !print*, ' Providing TC energy ...'
+  !call wall_time(t0)
 
   PROVIDE mo_l_coef mo_r_coef
+  PROVIDE two_e_tc_non_hermit_integral_alpha two_e_tc_non_hermit_integral_beta
 
   TC_HF_energy = nuclear_repulsion
   TC_HF_one_e_energy = 0.d0
@@ -28,6 +33,9 @@
   TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy
   TC_HF_energy += diag_three_elem_hf
 
+  !call wall_time(t1)
+  !print*, ' Wall time for TC energy=', t1-t0
+
 END_PROVIDER
 
 ! ---
diff --git a/src/utils/cgtos_one_e.irp.f b/src/utils/cgtos_one_e.irp.f
new file mode 100644
index 00000000..43ca8224
--- /dev/null
+++ b/src/utils/cgtos_one_e.irp.f
@@ -0,0 +1,120 @@
+
+! ---
+
+complex*16 function overlap_cgaussian_x(A_center, B_center, alpha, beta, power_A, power_B, dim)
+
+  BEGIN_DOC
+  !
+  ! \int_{-infty}^{+infty} (x-A_x)^ax (x-B_x)^bx exp(-alpha (x-A_x)^2) exp(- beta(x-B_X)^2) dx
+  ! with complex arguments
+  !
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+
+  integer,    intent(in) :: dim, power_A, power_B 
+  complex*16, intent(in) :: A_center, B_center, alpha, beta
+
+  integer                :: i, iorder_p
+  double precision       :: fact_p_mod
+  complex*16             :: P_new(0:max_dim), P_center, fact_p, p, inv_sq_p
+
+  complex*16             :: Fc_integral
+
+
+  call give_explicit_cpoly_and_cgaussian_x( P_new, P_center, p, fact_p, iorder_p &
+                                          , alpha, beta, power_A, power_B, A_center, B_center, dim)
+ 
+  fact_p_mod = dsqrt(real(fact_p)*real(fact_p) + aimag(fact_p)*aimag(fact_p))
+  if(fact_p_mod .lt. 1.d-14) then
+    overlap_cgaussian_x = (0.d0, 0.d0)
+    return
+  endif
+
+
+  inv_sq_p = (1.d0, 0.d0) / zsqrt(p)
+
+  overlap_cgaussian_x = (0.d0, 0.d0)
+  do i = 0, iorder_p
+    overlap_cgaussian_x += P_new(i) * Fc_integral(i, inv_sq_p)
+  enddo
+
+  overlap_cgaussian_x *= fact_p
+
+end function overlap_cgaussian_x
+
+! ---
+
+subroutine overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                , overlap_x, overlap_y, overlap_z, overlap, dim )
+
+  BEGIN_DOC
+  !
+  !   S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2)  (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx
+  !   S = S_x S_y S_z
+  !   for  complex arguments
+  !
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+
+  integer,    intent(in)  :: dim, power_A(3), power_B(3)
+  complex*16, intent(in)  :: A_center(3), B_center(3), alpha, beta
+  complex*16, intent(out) :: overlap_x, overlap_y, overlap_z, overlap
+
+  integer                 :: i, nmax, iorder_p(3)
+  double precision        :: fact_p_mod
+  complex*16              :: P_new(0:max_dim,3), P_center(3), fact_p, p, inv_sq_p
+  complex*16              :: F_integral_tab(0:max_dim)
+
+  complex*16              :: Fc_integral
+
+  call give_explicit_cpoly_and_cgaussian(P_new, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, B_center, dim)
+
+  fact_p_mod = dsqrt(real(fact_p)*real(fact_p) + aimag(fact_p)*aimag(fact_p))
+  if(fact_p_mod .lt. 1.d-14) then
+    overlap_x = (1.d-10, 0.d0)
+    overlap_y = (1.d-10, 0.d0)
+    overlap_z = (1.d-10, 0.d0)
+    overlap   = (1.d-10, 0.d0)
+    return
+  endif
+
+  nmax = maxval(iorder_p)
+
+  inv_sq_p = (1.d0, 0.d0) / zsqrt(p)
+  do i = 0, nmax
+    F_integral_tab(i) = Fc_integral(i, inv_sq_p)
+  enddo
+
+  overlap_x = P_new(0,1) * F_integral_tab(0)
+  overlap_y = P_new(0,2) * F_integral_tab(0)
+  overlap_z = P_new(0,3) * F_integral_tab(0)
+
+  do i = 1, iorder_p(1)
+    overlap_x = overlap_x + P_new(i,1) * F_integral_tab(i)
+  enddo
+  call cgaussian_product_x(alpha, A_center(1), beta, B_center(1), fact_p, p, P_center(1))
+  overlap_x *= fact_p
+
+  do i = 1, iorder_p(2)
+    overlap_y = overlap_y + P_new(i,2) * F_integral_tab(i)
+  enddo
+  call cgaussian_product_x(alpha, A_center(2), beta, B_center(2), fact_p, p, P_center(2))
+  overlap_y *= fact_p
+
+  do i = 1, iorder_p(3)
+    overlap_z = overlap_z + P_new(i,3) * F_integral_tab(i)
+  enddo
+  call cgaussian_product_x(alpha, A_center(3), beta, B_center(3), fact_p, p, P_center(3))
+  overlap_z *= fact_p
+
+  overlap = overlap_x * overlap_y * overlap_z
+
+end subroutine overlap_cgaussian_xyz
+
+! ---
+
+
diff --git a/src/utils/cgtos_utils.irp.f b/src/utils/cgtos_utils.irp.f
new file mode 100644
index 00000000..a820d5f2
--- /dev/null
+++ b/src/utils/cgtos_utils.irp.f
@@ -0,0 +1,780 @@
+
+! ---
+
+subroutine give_explicit_cpoly_and_cgaussian_x(P_new, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, B_center, dim)
+
+  BEGIN_DOC
+  ! Transform the product of
+  !          (x-x_A)^a (x-x_B)^b exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta)
+  ! into
+  !        fact_k \sum_{i=0}^{iorder} (x-x_P)^i exp(-p(r-P)^2)
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+
+  integer,    intent(in)  :: dim
+  integer,    intent(in)  :: a, b 
+  complex*16, intent(in)  :: alpha, beta, A_center, B_center          
+  integer,    intent(out) :: iorder            
+  complex*16, intent(out) :: p, P_center, fact_k          
+  complex*16, intent(out) :: P_new(0:max_dim)  
+
+  integer                 :: n_new, i, j
+  double precision        :: tmp_mod
+  complex*16              :: P_a(0:max_dim), P_b(0:max_dim)
+  complex*16              :: p_inv, ab, d_AB, tmp
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: P_a, P_b
+
+  P_new = (0.d0, 0.d0)
+
+  ! new exponent
+  p = alpha + beta
+
+  ! new center
+  p_inv    = (1.d0, 0.d0) / p
+  ab       = alpha * beta
+  P_center = (alpha * A_center + beta * B_center) * p_inv
+
+  ! get the factor
+  d_AB    = (A_center - B_center) * (A_center - B_center)
+  tmp     = ab * p_inv * d_AB
+  tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp))
+  if(tmp_mod .lt. 50.d0) then
+    fact_k = zexp(-tmp)
+  else
+    fact_k = (0.d0, 0.d0)
+  endif
+
+  ! Recenter the polynomials P_a and P_b on P_center
+  !DIR$ FORCEINLINE
+  call recentered_cpoly2(P_a(0), A_center, P_center, a, P_b(0), B_center, P_center, b)
+  n_new = 0
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(P_a(0), a, P_b(0), b, P_new(0), n_new)
+  iorder = a + b
+
+end subroutine give_explicit_cpoly_and_cgaussian_x
+
+! ---
+
+subroutine give_explicit_cpoly_and_cgaussian(P_new, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, B_center, dim)
+
+  BEGIN_DOC
+  ! Transforms the product of
+  !          (x-x_A)^a(1) (x-x_B)^b(1) (y-y_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta)
+  ! into
+  !        fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 )
+  !               * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 )
+  !               * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 )
+  !
+  ! WARNING ::: IF fact_k is too smal then: 
+  ! returns a "s" function centered in zero
+  ! with an inifinite exponent and a zero polynom coef
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+
+  integer,    intent(in)  :: dim, a(3), b(3)
+  complex*16, intent(in)  :: alpha, beta, A_center(3), B_center(3)      
+  integer,    intent(out) :: iorder(3)         
+  complex*16, intent(out) :: p, P_center(3), fact_k, P_new(0:max_dim,3)
+
+  integer                 :: n_new, i, j
+  double precision        :: tmp_mod
+  complex*16              :: P_a(0:max_dim,3), P_b(0:max_dim,3)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: P_a, P_b
+
+  iorder(1) = 0
+  iorder(2) = 0
+  iorder(3) = 0
+
+  P_new(0,1) = (0.d0, 0.d0)
+  P_new(0,2) = (0.d0, 0.d0)
+  P_new(0,3) = (0.d0, 0.d0)
+
+  !DIR$ FORCEINLINE
+  call cgaussian_product(alpha, A_center, beta, B_center, fact_k, p, P_center)
+
+  ! IF fact_k is too smal then: returns a "s" function centered in zero
+  ! with an inifinite exponent and a zero polynom coef
+  tmp_mod = dsqrt(REAL(fact_k)*REAL(fact_k) + AIMAG(fact_k)*AIMAG(fact_k))
+  if(tmp_mod < 1d-14) then
+    iorder               = 0
+    p                    = (1.d+14, 0.d0)
+    fact_k               = (0.d0  , 0.d0)
+    P_new(0:max_dim,1:3) = (0.d0  , 0.d0)
+    P_center(1:3)        = (0.d0  , 0.d0)
+    return
+  endif
+
+  !DIR$ FORCEINLINE
+  call recentered_cpoly2(P_a(0,1), A_center(1), P_center(1), a(1), P_b(0,1), B_center(1), P_center(1), b(1))
+  iorder(1) = a(1) + b(1)
+  do i = 0, iorder(1)
+    P_new(i,1) = 0.d0
+  enddo
+  n_new = 0
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(P_a(0,1), a(1), P_b(0,1), b(1), P_new(0,1), n_new)
+
+  !DIR$ FORCEINLINE
+  call recentered_cpoly2(P_a(0,2), A_center(2), P_center(2), a(2), P_b(0,2), B_center(2), P_center(2), b(2))
+  iorder(2) = a(2) + b(2)
+  do i = 0, iorder(2)
+    P_new(i,2) = 0.d0
+  enddo
+  n_new = 0
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(P_a(0,2), a(2), P_b(0,2), b(2), P_new(0,2), n_new)
+
+  !DIR$ FORCEINLINE
+  call recentered_cpoly2(P_a(0,3), A_center(3), P_center(3), a(3), P_b(0,3), B_center(3), P_center(3), b(3))
+  iorder(3) = a(3) + b(3)
+  do i = 0, iorder(3)
+    P_new(i,3) = 0.d0
+  enddo
+  n_new = 0
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(P_a(0,3), a(3), P_b(0,3), b(3), P_new(0,3), n_new)
+
+end subroutine give_explicit_cpoly_and_cgaussian
+
+! ---
+
+!subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim)
+!  BEGIN_DOC
+!  ! Transforms the product of
+!  !          (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3)
+!  !          exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta) exp(-(r-Nucl_center)^2 gama
+!  !
+!  ! into
+!  !        fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 )
+!  !               * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 )
+!  !               * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 )
+!  END_DOC
+!  implicit none
+!  include 'constants.include.F'
+!  integer, intent(in)            :: dim
+!  integer, intent(in)            :: a(3),b(3)         ! powers : (x-xa)**a_x = (x-A(1))**a(1)
+!  double precision, intent(in)   :: alpha, beta, gama ! exponents
+!  double precision, intent(in)   :: A_center(3)       ! A center
+!  double precision, intent(in)   :: B_center (3)      ! B center
+!  double precision, intent(in)   :: Nucl_center(3)    ! B center
+!  double precision, intent(out)  :: P_center(3)       ! new center
+!  double precision, intent(out)  :: p                 ! new exponent
+!  double precision, intent(out)  :: fact_k            ! constant factor
+!  double precision, intent(out)  :: P_new(0:max_dim,3)! polynomial
+!  integer         , intent(out)  :: iorder(3)         ! i_order(i) = order of the polynomials
+!
+!  double precision  :: P_center_tmp(3)       ! new center
+!  double precision  :: p_tmp                 ! new exponent
+!  double precision  :: fact_k_tmp,fact_k_bis ! constant factor
+!  double precision  :: P_new_tmp(0:max_dim,3)! polynomial
+!  integer :: i,j
+!  double precision :: binom_func
+!
+!  ! First you transform the two primitives into a sum of primitive with the same center P_center_tmp and gaussian exponent p_tmp
+!  call give_explicit_cpoly_and_cgaussian(P_new_tmp,P_center_tmp,p_tmp,fact_k_tmp,iorder,alpha,beta,a,b,A_center,B_center,dim)
+!  ! Then you create the new gaussian from the product of the new one per the Nuclei one
+!  call cgaussian_product(p_tmp,P_center_tmp,gama,Nucl_center,fact_k_bis,p,P_center)
+!  fact_k = fact_k_bis * fact_k_tmp
+!
+!  ! Then you build the coefficient of the new polynom
+!  do i = 0, iorder(1)
+!   P_new(i,1) = 0.d0
+!   do j = i,iorder(1)
+!    P_new(i,1) = P_new(i,1) + P_new_tmp(j,1) * binom_func(j,j-i) * (P_center(1) - P_center_tmp(1))**(j-i)
+!   enddo
+!  enddo
+!  do i = 0, iorder(2)
+!   P_new(i,2) = 0.d0
+!   do j = i,iorder(2)
+!    P_new(i,2) = P_new(i,2) + P_new_tmp(j,2) * binom_func(j,j-i) * (P_center(2) - P_center_tmp(2))**(j-i)
+!   enddo
+!  enddo
+!  do i = 0, iorder(3)
+!   P_new(i,3) = 0.d0
+!   do j = i,iorder(3)
+!    P_new(i,3) = P_new(i,3) + P_new_tmp(j,3) * binom_func(j,j-i) * (P_center(3) - P_center_tmp(3))**(j-i)
+!   enddo
+!  enddo
+!
+!end
+
+! ---
+
+subroutine cgaussian_product(a, xa, b, xb, k, p, xp)
+
+  BEGIN_DOC
+  ! complex Gaussian product
+  ! e^{-a (r-r_A)^2} e^{-b (r-r_B)^2} = k e^{-p (r-r_P)^2}
+  END_DOC
+
+  implicit none
+  complex*16, intent(in)   :: a, b, xa(3), xb(3) 
+  complex*16, intent(out)  :: p, k, xp(3)      
+
+  double precision         :: tmp_mod
+  complex*16               :: p_inv, xab(3), ab
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
+
+  ASSERT (REAL(a) > 0.)
+  ASSERT (REAL(b) > 0.)
+
+  ! new exponent
+  p = a + b
+
+  xab(1) = xa(1) - xb(1)
+  xab(2) = xa(2) - xb(2)
+  xab(3) = xa(3) - xb(3)
+
+  p_inv = (1.d0, 0.d0) / p
+  ab    = a * b * p_inv
+
+  k       = ab * (xab(1)*xab(1) + xab(2)*xab(2) + xab(3)*xab(3))
+  tmp_mod = dsqrt(REAL(k)*REAL(k) + AIMAG(k)*AIMAG(k))
+  if(tmp_mod .gt. 40.d0) then
+    k       = (0.d0, 0.d0)
+    xp(1:3) = (0.d0, 0.d0)
+    return
+  endif
+
+  k = zexp(-k)
+  xp(1) = ( a * xa(1) + b * xb(1) ) * p_inv
+  xp(2) = ( a * xa(2) + b * xb(2) ) * p_inv
+  xp(3) = ( a * xa(3) + b * xb(3) ) * p_inv
+
+end subroutine cgaussian_product
+
+! ---
+
+subroutine cgaussian_product_x(a, xa, b, xb, k, p, xp)
+
+  BEGIN_DOC
+  ! complex Gaussian product in 1D.
+  ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K e^{-p (x-x_P)^2}
+  END_DOC
+
+  implicit none
+  complex*16, intent(in)  :: a, b, xa, xb
+  complex*16, intent(out) :: p, k, xp
+
+  double precision        :: tmp_mod
+  complex*16              :: p_inv
+  complex*16              :: xab, ab
+
+  ASSERT (REAL(a) > 0.)
+  ASSERT (REAL(b) > 0.)
+
+  ! new center
+  p = a + b
+
+  xab = xa - xb
+
+  p_inv = (1.d0, 0.d0) / p
+  ab    = a * b * p_inv
+
+  k       = ab * xab*xab
+  tmp_mod = dsqrt(REAL(k)*REAL(k) + AIMAG(k)*AIMAG(k))
+  if(tmp_mod > 40.d0) then
+    k  = (0.d0, 0.d0)
+    xp = (0.d0, 0.d0)
+    return
+  endif
+
+  k  = zexp(-k)
+  xp = (a*xa + b*xb) * p_inv
+
+end subroutine cgaussian_product_x
+
+! ---
+
+subroutine multiply_cpoly(b, nb, c, nc, d, nd)
+
+  BEGIN_DOC
+  ! Multiply two complex polynomials
+  ! D(t) += B(t) * C(t)
+  END_DOC
+
+  implicit none
+
+  integer,    intent(in)    :: nb, nc
+  complex*16, intent(in)    :: b(0:nb), c(0:nc)
+  complex*16, intent(inout) :: d(0:nb+nc)
+  integer,    intent(out)   :: nd
+
+  integer                   :: ndtmp, ib, ic
+  double precision          :: tmp_mod
+  complex*16                :: tmp
+
+  if(ior(nc, nb) >= 0) then ! True if nc>=0 and nb>=0
+    continue
+  else
+    return
+  endif
+
+  ndtmp = nb + nc
+
+  do ic = 0, nc
+    d(ic) = d(ic) + c(ic) * b(0)
+  enddo
+
+  do ib = 1, nb
+    d(ib) = d(ib) + c(0) * b(ib)
+    do ic = 1, nc
+      d(ib+ic) = d(ib+ic) + c(ic) * b(ib)
+    enddo
+  enddo
+
+  do nd = ndtmp, 0, -1
+    tmp     = d(nd)
+    tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp))
+    if(tmp_mod .lt. 1.d-15) cycle
+    exit
+  enddo
+
+end subroutine multiply_cpoly
+
+! ---
+
+subroutine add_cpoly(b, nb, c, nc, d, nd)
+
+  BEGIN_DOC
+  ! Add two complex polynomials
+  ! D(t) += B(t) + C(t)
+  END_DOC
+
+  implicit none
+  complex*16, intent(in)    :: b(0:nb), c(0:nc)
+  integer,    intent(inout) :: nb, nc
+  integer,    intent(out)   :: nd
+  complex*16, intent(out)   :: d(0:nb+nc)
+
+  integer                   :: ib
+  double precision          :: tmp_mod
+  complex*16                :: tmp
+
+  nd = nb + nc
+  do ib = 0, max(nb, nc)
+    d(ib) = d(ib) + c(ib) + b(ib)
+  enddo
+
+  tmp     = d(nd)
+  tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp))
+  do while( (tmp_mod .lt. 1.d-15) .and. (nd >= 0) )
+    nd -= 1
+    tmp     = d(nd)
+    tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp))
+    if(nd < 0) exit
+  enddo
+
+end subroutine add_cpoly
+
+! ---
+
+subroutine add_cpoly_multiply(b, nb, cst, d, nd)
+
+  BEGIN_DOC
+  ! Add a complex polynomial multiplied by a complex constant
+  ! D(t) += cst * B(t)
+  END_DOC
+
+  implicit none
+
+  integer,    intent(in)    :: nb
+  complex*16, intent(in)    :: b(0:nb), cst
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max(nb, nd))
+
+  integer                   :: ib 
+  double precision          :: tmp_mod
+  complex*16                :: tmp
+
+  nd = max(nd, nb)
+  if(nd /= -1) then
+
+    do ib = 0, nb
+      d(ib) = d(ib) + cst * b(ib)
+    enddo
+
+    tmp     = d(nd)
+    tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp))
+    do while(tmp_mod .lt. 1.d-15)
+      nd -= 1
+      if(nd < 0) exit
+      tmp     = d(nd)
+      tmp_mod = dsqrt(REAL(tmp)*REAL(tmp) + AIMAG(tmp)*AIMAG(tmp))
+    enddo
+
+  endif
+
+end subroutine add_cpoly_multiply
+
+! ---
+
+subroutine recentered_cpoly2(P_A, x_A, x_P, a, P_B, x_B, x_Q, b)
+
+  BEGIN_DOC
+  !
+  ! write two complex polynomials (x-x_A)^a (x-x_B)^b 
+  ! as P_A(x-x_P) and P_B(x-x_Q)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,    intent(in)  :: a, b
+  complex*16, intent(in)  :: x_A, x_P, x_B, x_Q
+  complex*16, intent(out) :: P_A(0:a), P_B(0:b)
+
+  integer                 :: i, minab, maxab
+  complex*16              :: pows_a(-2:a+b+4), pows_b(-2:a+b+4)
+
+  double precision        :: binom_func
+
+  if((a<0) .or. (b<0)) return
+
+  maxab = max(a, b)
+  minab = max(min(a, b), 0)
+
+  pows_a(0) = (1.d0, 0.d0)
+  pows_a(1) = x_P - x_A
+
+  pows_b(0) = (1.d0, 0.d0)
+  pows_b(1) = x_Q - x_B
+
+  do i = 2, maxab
+    pows_a(i) = pows_a(i-1) * pows_a(1)
+    pows_b(i) = pows_b(i-1) * pows_b(1)
+  enddo
+
+  P_A(0) = pows_a(a)
+  P_B(0) = pows_b(b)
+
+  do i = 1, min(minab, 20)
+    P_A(i) = binom_transp(a-i,a) * pows_a(a-i)
+    P_B(i) = binom_transp(b-i,b) * pows_b(b-i)
+  enddo
+
+  do i = minab+1, min(a, 20)
+    P_A(i) = binom_transp(a-i,a) * pows_a(a-i)
+  enddo
+  do i = minab+1, min(b, 20)
+    P_B(i) = binom_transp(b-i,b) * pows_b(b-i)
+  enddo
+
+  do i = 101, a
+    P_A(i) = binom_func(a,a-i) * pows_a(a-i)
+  enddo
+  do i = 101, b
+    P_B(i) = binom_func(b,b-i) * pows_b(b-i)
+  enddo
+
+end subroutine recentered_cpoly2
+
+! ---
+
+complex*16 function Fc_integral(n, inv_sq_p)
+
+  BEGIN_DOC
+  ! function that calculates the following integral
+  ! \int_{\-infty}^{+\infty} x^n \exp(-p x^2) dx
+  ! for complex valued p
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+
+  integer,    intent(in) :: n
+  complex*16, intent(in) :: inv_sq_p 
+
+  ! (n)! 
+  double precision       :: fact
+
+  if(n < 0) then
+    Fc_integral = (0.d0, 0.d0)
+    return
+  endif
+
+  ! odd n
+  if(iand(n, 1) .ne. 0) then
+    Fc_integral = (0.d0, 0.d0)
+    return
+  endif
+
+  if(n == 0) then
+    Fc_integral = sqpi * inv_sq_p
+    return
+  endif
+
+  Fc_integral = sqpi * 0.5d0**n * inv_sq_p**dble(n+1) * fact(n) / fact(shiftr(n, 1))
+
+end function Fc_integral
+
+! ---
+
+complex*16 function crint(n, rho)
+
+  implicit none
+  include 'constants.include.F'
+
+  integer,    intent(in)  :: n
+  complex*16, intent(in)  :: rho
+
+  integer                 :: i, mmax
+  double precision        :: rho_mod, rho_re, rho_im 
+  double precision        :: sq_rho_re, sq_rho_im
+  double precision        :: n_tmp
+  complex*16              :: sq_rho, rho_inv, rho_exp
+
+  complex*16              :: crint_smallz, cpx_erf
+
+  rho_re  = REAL (rho)
+  rho_im  = AIMAG(rho)
+  rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im)
+
+  if(rho_mod < 10.d0) then
+    ! small z
+
+    if(rho_mod .lt. 1.d-10) then
+      crint = 1.d0 / dble(n + n + 1)
+    else
+      crint = crint_smallz(n, rho)
+    endif
+
+  else
+    ! large z
+
+    if(rho_mod .gt. 40.d0) then
+
+      n_tmp = dble(n) + 0.5d0
+      crint = 0.5d0 * gamma(n_tmp) / (rho**n_tmp)
+
+    else
+
+      ! get \sqrt(rho)
+      sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod)
+      sq_rho_im = 0.5d0 * rho_im / sq_rho_re 
+      sq_rho    = sq_rho_re + (0.d0, 1.d0) * sq_rho_im
+
+      rho_exp = 0.5d0 * zexp(-rho)
+      rho_inv = (1.d0, 0.d0) / rho
+
+      crint = 0.5d0 * sqpi * cpx_erf(sq_rho_re, sq_rho_im) / sq_rho
+      mmax = n
+      if(mmax .gt. 0) then
+        do i = 0, mmax-1
+          crint = ((dble(i) + 0.5d0) * crint - rho_exp) * rho_inv
+        enddo
+      endif
+
+      ! ***
+      
+    endif
+
+  endif
+
+!  print *, n, real(rho), real(crint)
+
+end function crint
+
+! ---
+
+complex*16 function crint_sum(n_pt_out, rho, d1)
+
+  implicit none
+  include 'constants.include.F'
+
+  integer,    intent(in)  :: n_pt_out
+  complex*16, intent(in)  :: rho, d1(0:n_pt_out)
+
+  integer                 :: n, i, mmax
+  double precision        :: rho_mod, rho_re, rho_im 
+  double precision        :: sq_rho_re, sq_rho_im
+  complex*16              :: sq_rho, F0
+  complex*16              :: rho_tmp, rho_inv, rho_exp
+  complex*16, allocatable :: Fm(:)
+
+  complex*16              :: crint_smallz, cpx_erf
+
+  rho_re  = REAL (rho)
+  rho_im  = AIMAG(rho)
+  rho_mod = dsqrt(rho_re*rho_re + rho_im*rho_im)
+
+  if(rho_mod < 10.d0) then
+    ! small z
+
+    if(rho_mod .lt. 1.d-10) then
+
+!      print *, ' 111'
+!      print *, ' rho = ', rho
+
+      crint_sum = d1(0)
+!      print *, 0, 1
+
+      do i = 2, n_pt_out, 2
+
+        n = shiftr(i, 1)
+        crint_sum = crint_sum + d1(i) / dble(n+n+1)
+
+!        print *, n, 1.d0 / dble(n+n+1)
+      enddo
+
+      ! ***
+
+    else
+
+!      print *, ' 222'
+!      print *, ' rho = ', real(rho)
+!      if(abs(aimag(rho)) .gt. 1d-15) then
+!        print *, ' complex rho', rho
+!        stop
+!      endif
+
+      crint_sum = d1(0) * crint_smallz(0, rho)
+
+!      print *, 0, real(d1(0)), real(crint_smallz(0, rho))
+!      if(abs(aimag(d1(0))) .gt. 1d-15) then
+!        print *, ' complex d1(0)', d1(0)
+!        stop
+!      endif
+
+      do i = 2, n_pt_out, 2
+        n = shiftr(i, 1)
+        crint_sum = crint_sum + d1(i) * crint_smallz(n, rho)
+
+!        print *, n, real(d1(i)), real(crint_smallz(n, rho))
+!        if(abs(aimag(d1(i))) .gt. 1d-15) then
+!          print *, ' complex d1(i)', i, d1(i)
+!          stop
+!        endif
+
+      enddo
+
+!      print *, 'sum = ', real(crint_sum)
+!      if(abs(aimag(crint_sum)) .gt. 1d-15) then
+!        print *, ' complex crint_sum', crint_sum
+!        stop
+!      endif
+
+      ! ***
+
+    endif
+
+  else
+    ! large z
+
+    if(rho_mod .gt. 40.d0) then
+
+!      print *, ' 333'
+!      print *, ' rho = ', rho
+
+      rho_inv   = (1.d0, 0.d0) / rho
+      rho_tmp   = 0.5d0 * sqpi * zsqrt(rho_inv)
+      crint_sum = rho_tmp * d1(0)
+!      print *, 0, rho_tmp
+
+      do i = 2, n_pt_out, 2
+        n = shiftr(i, 1)
+        rho_tmp   = rho_tmp * (dble(n) + 0.5d0) * rho_inv
+        crint_sum = crint_sum + rho_tmp * d1(i)
+!        print *, n, rho_tmp
+      enddo
+
+      ! ***
+
+    else
+
+!      print *, ' 444'
+!      print *, ' rho = ', rho
+
+      ! get \sqrt(rho)
+      sq_rho_re = sq_op5 * dsqrt(rho_re + rho_mod)
+      sq_rho_im = 0.5d0 * rho_im / sq_rho_re 
+      sq_rho    = sq_rho_re + (0.d0, 1.d0) * sq_rho_im
+      !sq_rho = zsqrt(rho)
+
+
+      F0        = 0.5d0 * sqpi * cpx_erf(sq_rho_re, sq_rho_im) / sq_rho
+      crint_sum = F0 * d1(0)
+!      print *, 0, F0
+
+      rho_exp = 0.5d0 * zexp(-rho)
+      rho_inv = (1.d0, 0.d0) / rho
+
+      mmax = shiftr(n_pt_out, 1)
+      if(mmax .gt. 0) then
+
+        allocate( Fm(mmax) )
+        Fm(1:mmax) = (0.d0, 0.d0)
+
+        do n = 0, mmax-1
+          F0      = ((dble(n) + 0.5d0) * F0 - rho_exp) * rho_inv
+          Fm(n+1) = F0
+!          print *, n, F0
+        enddo
+    
+        do i = 2, n_pt_out, 2
+          n = shiftr(i, 1)
+          crint_sum = crint_sum + Fm(n) * d1(i)
+        enddo
+        deallocate(Fm)
+      endif
+
+      ! ***
+      
+    endif
+
+  endif
+
+end function crint_sum
+
+! ---
+
+complex*16 function crint_smallz(n, rho)
+
+  BEGIN_DOC
+  ! Standard version of rint
+  END_DOC
+
+  implicit none
+  integer,    intent(in)      :: n
+  complex*16, intent(in)      :: rho
+
+  integer,          parameter :: kmax = 40
+  double precision, parameter :: eps = 1.d-13
+
+  integer                     :: k
+  double precision            :: delta_mod
+  complex*16                  :: rho_k, ct, delta_k
+
+  ct           = 0.5d0 * zexp(-rho) * gamma(dble(n) + 0.5d0)
+  rho_k        = (1.d0, 0.d0)
+  crint_smallz = ct * rho_k / gamma(dble(n) + 1.5d0)
+
+  do k = 1, kmax
+
+    rho_k        = rho_k * rho
+    delta_k      = ct * rho_k / gamma(dble(n+k) + 1.5d0)
+    crint_smallz = crint_smallz + delta_k
+
+    delta_mod = dsqrt(REAL(delta_k)*REAL(delta_k) + AIMAG(delta_k)*AIMAG(delta_k))
+    if(delta_mod .lt. eps) return
+  enddo
+
+  if(delta_mod > eps) then
+    write(*,*) ' pb in crint_smallz !'
+    write(*,*) ' n, rho    = ', n, rho
+    write(*,*) ' delta_mod = ', delta_mod
+    stop 1
+  endif
+
+end function crint_smallz
+
+! ---
+
diff --git a/src/utils/cpx_erf.irp.f b/src/utils/cpx_erf.irp.f
new file mode 100644
index 00000000..61f81055
--- /dev/null
+++ b/src/utils/cpx_erf.irp.f
@@ -0,0 +1,204 @@
+
+! ---
+
+complex*16 function cpx_erf(x, y)
+
+  BEGIN_DOC
+  !
+  ! compute erf(z) for z = x + i y
+  !
+  ! REF: Abramowitz and Stegun
+  !
+  END_DOC
+
+  implicit none
+  
+  double precision, intent(in) :: x, y
+
+  double precision             :: yabs
+  complex*16                   :: erf_tmp1, erf_tmp2, erf_tmp3, erf_tot
+
+  double precision             :: erf_F 
+  complex*16                   :: erf_E, erf_G, erf_H
+
+  yabs = dabs(y)
+
+  if(yabs .lt. 1.d-15) then
+
+    cpx_erf = (1.d0, 0.d0) * derf(x)
+    return
+
+  else
+
+    erf_tmp1 = (1.d0, 0.d0) * derf(x)
+    erf_tmp2 = erf_E(x, yabs) + erf_F(x, yabs)
+    erf_tmp3 = zexp(-(0.d0, 2.d0) * x * yabs) * ( erf_G(x, yabs) + erf_H(x, yabs) )
+    erf_tot  = erf_tmp1 + erf_tmp2 - erf_tmp3
+
+  endif
+
+  if(y .gt. 0.d0) then
+    cpx_erf = erf_tot
+  else
+    cpx_erf = CONJG(erf_tot)
+  endif
+
+end function cpx_erf
+
+! ---
+
+complex*16 function erf_E(x, yabs)
+ 
+  implicit none
+  include 'constants.include.F'
+
+  double precision, intent(in) :: x, yabs
+
+  if( (dabs(x).gt.6.d0) .or. (x==0.d0) ) then
+    erf_E = (0.d0, 0.d0)
+    return
+  endif
+
+  if(dabs(x) .lt. 1.d-7) then
+
+    erf_E = -inv_pi * (0.d0, 1.d0) * yabs
+
+  else
+
+    erf_E = 0.5d0 * inv_pi * dexp(-x*x) &
+          * ((1.d0, 0.d0) - zexp(-(2.d0, 0.d0) * x * yabs)) / x
+
+  endif
+
+end function erf_E
+
+! ---
+
+double precision function erf_F(x, yabs)
+ 
+  implicit none
+  include 'constants.include.F'
+
+  double precision, intent(in) :: x, yabs
+
+  integer, parameter           :: Nmax = 13
+
+  integer                      :: i
+  double precision             :: tmp1, tmp2, x2, ct 
+
+
+  if(dabs(x) .gt. 5.8d0) then
+
+    erf_F = 0.d0
+
+  else
+
+    x2 = x * x
+    ct = x * inv_pi
+
+    erf_F = 0.d0
+    do i = 1, Nmax
+
+      tmp1  = 0.25d0 * dble(i) * dble(i) + x2
+      tmp2  = dexp(-tmp1) / tmp1
+      erf_F = erf_F + tmp2
+
+      if(dabs(tmp2) .lt. 1d-15) exit
+    enddo
+    erf_F = ct * erf_F
+
+  endif 
+
+end function erf_F
+
+! ---
+
+complex*16 function erf_G(x, yabs)
+
+  implicit none
+  include 'constants.include.F'
+
+  double precision, intent(in) :: x, yabs
+
+  integer, parameter           :: Nmax = 13
+
+  integer                      :: i, tmpi, imin, imax
+  double precision             :: tmp0, tmp1, x2, idble
+  complex*16                   :: tmp2
+
+  if(x .eq. 0.d0) then
+    erf_G = (0.d0, 0.d0)
+    return
+  endif
+
+  tmpi = int(2.d0 * yabs)
+  imin = max(1, tmpi-Nmax)
+  imax = tmpi + Nmax
+
+  x2 = x * x
+
+  erf_G = 0.d0
+  do i = imin, imax
+
+    idble = dble(i)
+    tmp0  = 0.5d0 * idble
+    tmp1  = tmp0 * tmp0 + x2
+    tmp2  = dexp( idble * yabs - tmp1 - dlog(tmp1) - dlog_2pi) * (x - (0.d0, 1.d0)*tmp0)
+
+    erf_G = erf_G + tmp2
+
+  enddo
+
+end function erf_G
+
+! ---
+
+complex*16 function erf_H(x, yabs)
+ 
+  implicit none
+  include 'constants.include.F'
+
+  double precision, intent(in) :: x, yabs
+
+  integer, parameter           :: Nmax = 13
+
+  integer                      :: i
+  double precision             :: tmp0, tmp1, tmp_mod, x2, ct, idble
+  complex*16                   :: tmp2
+
+  if(x .eq. 0.d0) then
+    erf_H = (0.d0, 0.d0)
+    return
+  endif
+
+
+  if( (dabs(x) .lt. 10d0) .and. (yabs .lt. 6.1d0) ) then
+
+    x2 = x * x
+    ct = 0.5d0 * inv_pi
+
+    erf_H = 0.d0
+    do i = 1, Nmax
+
+      idble = dble(i)
+      tmp0  = 0.5d0 * idble
+      tmp1  = tmp0 * tmp0 + x2
+      tmp2  = dexp(-tmp1-idble*yabs) * (x + (0.d0, 1.d0)*tmp0) / tmp1
+      erf_H = erf_H + tmp2
+
+      tmp_mod = dsqrt(REAL(tmp2)*REAL(tmp2) + AIMAG(tmp2)*AIMAG(tmp2))
+      if(tmp_mod .lt. 1d-15) exit
+    enddo
+    erf_H = ct * erf_H
+
+  else
+
+    erf_H = (0.d0, 0.d0)
+
+  endif 
+
+end function erf_H
+
+! ---
+
+
diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f
index c8a36775..b60e3bc1 100644
--- a/src/utils/integration.irp.f
+++ b/src/utils/integration.irp.f
@@ -133,7 +133,7 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_k, io
 
   BEGIN_DOC
   ! Transforms the product of
-  !          (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta)
+  !          (x-x_A)^a(1) (x-x_B)^b(1) (y-y_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta)
   ! into
   !        fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 )
   !               * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 )
@@ -427,6 +427,46 @@ subroutine gaussian_product_x(a,xa,b,xb,k,p,xp)
 end subroutine
 
 
+!-
+
+subroutine gaussian_product_x_v(a,xa,b,xb,k,p,xp,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Gaussian product in 1D with multiple xa
+  ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
+  END_DOC
+
+  integer, intent(in) :: n_points
+  double precision  , intent(in) :: a,b      ! Exponents
+  double precision  , intent(in) :: xa(n_points),xb    ! Centers
+  double precision  , intent(out) :: p(n_points)       ! New exponent
+  double precision  , intent(out) :: xp(n_points) ! New center
+  double precision  , intent(out) :: k(n_points)       ! Constant
+
+  double precision               :: p_inv
+  integer :: ipoint
+
+  ASSERT (a>0.)
+  ASSERT (b>0.)
+
+  double precision               :: xab, ab
+
+  p = a+b
+  p_inv = 1.d0/(a+b)
+  ab = a*b*p_inv
+  do ipoint = 1, n_points
+    xab = xa(ipoint)-xb
+    k(ipoint) = ab*xab*xab
+    if (k(ipoint) > 40.d0) then
+      k(ipoint)=0.d0
+      cycle
+    endif
+    k(ipoint) = exp(-k(ipoint))
+    xp(ipoint) = (a*xa(ipoint)+b*xb)*p_inv
+  enddo
+end subroutine
+
+
 
 subroutine multiply_poly_0c(b,c,nc,d,nd)
   implicit none
@@ -599,8 +639,10 @@ subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points)
       enddo
     enddo
   enddo
+
 end
 
+
 subroutine add_poly(b,nb,c,nc,d,nd)
   implicit none
   BEGIN_DOC
@@ -1134,3 +1176,94 @@ double precision function rint1(n,rho)
   write(*,*)'pb in rint1 k too large!'
   stop 1
 end
+
+! ---
+
+double precision function V_phi(n, m)
+
+  BEGIN_DOC
+  ! Computes the angular $\phi$ part of the nuclear attraction integral:
+  !
+  ! $\int_{0}^{2 \pi} \cos(\phi)^n \sin(\phi)^m d\phi$.
+  END_DOC
+
+  implicit none
+  integer, intent(in) :: n, m
+
+  integer             :: i
+  double precision    :: prod
+
+  double precision    :: Wallis
+
+  prod = 1.d0
+  do i = 0, shiftr(n, 1)-1
+    prod = prod/ (1.d0 + dfloat(m+1)/dfloat(n-i-i-1))
+  enddo
+  V_phi = 4.d0 * prod * Wallis(m)
+
+end function V_phi
+
+! ---
+
+double precision function V_theta(n, m)
+
+  BEGIN_DOC
+  ! Computes the angular $\theta$ part of the nuclear attraction integral:
+  !
+  ! $\int_{0}^{\pi} \cos(\theta)^n \sin(\theta)^m d\theta$
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+  integer, intent(in) :: n, m
+
+  integer             :: i
+  double precision    :: prod
+
+  double precision    :: Wallis
+
+  V_theta = 0.d0
+  prod = 1.d0
+  do i = 0, shiftr(n, 1)-1
+    prod = prod / (1.d0 + dfloat(m+1)/dfloat(n-i-i-1))
+  enddo
+  V_theta = (prod + prod) * Wallis(m)
+
+end function V_theta
+
+! ---
+
+double precision function Wallis(n)
+
+  BEGIN_DOC
+  ! Wallis integral:
+  !
+  ! $\int_{0}^{\pi} \cos(\theta)^n d\theta$.
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer, intent(in) :: n
+
+  integer             :: p
+
+  double precision    :: fact
+
+  if(iand(n, 1) .eq. 0) then
+
+    Wallis = fact(shiftr(n, 1))
+    Wallis = pi * fact(n) / (dble(ibset(0_8, n)) * (Wallis + Wallis) * Wallis)
+
+  else
+
+    p = shiftr(n, 1)
+    Wallis = fact(p)
+    Wallis = dble(ibset(0_8, p+p)) * Wallis * Wallis / fact(p+p+1)
+
+  endif
+
+end function Wallis
+
+! ---
+
diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f
index 081adee3..c797c87e 100644
--- a/src/utils/one_e_integration.irp.f
+++ b/src/utils/one_e_integration.irp.f
@@ -32,9 +32,8 @@ double precision function overlap_gaussian_x(A_center,B_center,alpha,beta,power_
 end
 
 
-subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,&
-      power_B,overlap_x,overlap_y,overlap_z,overlap,dim)
-  implicit none
+subroutine overlap_gaussian_xyz(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, overlap_y, overlap_z, overlap, dim)
+
   BEGIN_DOC
   !.. math::
   !
@@ -42,7 +41,10 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,&
   !   S = S_x S_y S_z
   !
   END_DOC
+
   include 'constants.include.F'
+
+  implicit none
   integer,intent(in)             :: dim ! dimension maximum for the arrays representing the polynomials
   double precision,intent(in)    :: A_center(3),B_center(3)  ! center of the x1 functions
   double precision, intent(in)   :: alpha,beta
@@ -51,17 +53,18 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,&
   double precision               :: P_new(0:max_dim,3),P_center(3),fact_p,p
   double precision               :: F_integral_tab(0:max_dim)
   integer                        :: iorder_p(3)
-
-  call give_explicit_poly_and_gaussian(P_new,P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,dim)
-   if(fact_p.lt.1d-20)then
-     overlap_x = 1.d-10
-     overlap_y = 1.d-10
-     overlap_z = 1.d-10
-     overlap = 1.d-10
-     return
-   endif
   integer                        :: nmax
   double precision               :: F_integral
+
+  call give_explicit_poly_and_gaussian(P_new, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, B_center, dim)
+  if(fact_p.lt.1d-20)then
+    overlap_x = 1.d-10
+    overlap_y = 1.d-10
+    overlap_z = 1.d-10
+    overlap = 1.d-10
+    return
+  endif
+
   nmax = maxval(iorder_p)
   do i = 0,nmax
     F_integral_tab(i) = F_integral(i,p)
@@ -93,40 +96,47 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,&
 
 end
 
+! ---
+
+subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, lower_exp_val, dx, nx)
 
-subroutine overlap_x_abs(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,lower_exp_val,dx,nx)
-  implicit none
   BEGIN_DOC
   ! .. math                      ::
   !
   !  \int_{-infty}^{+infty} (x-A_center)^(power_A) * (x-B_center)^power_B * exp(-alpha(x-A_center)^2) * exp(-beta(x-B_center)^2) dx
   !
   END_DOC
-  integer                        :: i,j,k,l
-  integer,intent(in)             :: power_A,power_B
-  double precision, intent(in)   :: lower_exp_val
-  double precision,intent(in)    :: A_center, B_center,alpha,beta
-  double precision, intent(out)  :: overlap_x,dx
-  integer, intent(in)            :: nx
-  double precision               :: x_min,x_max,domain,x,factor,dist,p,p_inv,rho
-  double precision               :: P_center
-  if(power_A.lt.0.or.power_B.lt.0)then
+
+  implicit none
+
+  integer, intent(in)           :: power_A, power_B, nx
+  double precision, intent(in)  :: lower_exp_val, A_center, B_center, alpha, beta
+  double precision, intent(out) :: overlap_x, dx
+
+  integer                       :: i, j, k, l
+  double precision              :: x_min, x_max, domain, x, factor, dist, p, p_inv, rho
+  double precision              :: P_center
+  double precision              :: tmp
+
+  if(power_A.lt.0 .or. power_B.lt.0) then
     overlap_x = 0.d0
     dx = 0.d0
     return
   endif
-  p = alpha + beta
-  p_inv= 1.d0/p
-  rho = alpha * beta * p_inv
-  dist = (A_center - B_center)*(A_center - B_center)
+
+  p     = alpha + beta
+  p_inv = 1.d0/p
+  rho   = alpha * beta * p_inv
+  dist  = (A_center - B_center)*(A_center - B_center)
   P_center = (alpha * A_center + beta * B_center) * p_inv
-  if(rho*dist.gt.80.d0)then
+
+  if(rho*dist.gt.80.d0) then
    overlap_x= 0.d0
    return
   endif
+
   factor = dexp(-rho * dist)
 
-  double precision               :: tmp
 
   tmp = dsqrt(lower_exp_val/p)
   x_min = P_center - tmp
@@ -143,7 +153,7 @@ subroutine overlap_x_abs(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,
   overlap_x = factor * dx * overlap_x
 end
 
-
+! ---
 
 subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, power_B, overlap, n_points)
 
@@ -173,7 +183,7 @@ subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, powe
   double precision              :: F_integral
   double precision, allocatable :: P_new(:,:,:), P_center(:,:), fact_p(:)
 
-  ldp = maxval(power_A(1:3) + power_B(1:3))
+  ldp = maxval( power_A(1:3) + power_B(1:3) )
 
   allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points))
 
diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f
index 41e7cad6..aba99c2b 100644
--- a/src/utils/util.irp.f
+++ b/src/utils/util.irp.f
@@ -460,6 +460,33 @@ subroutine v2_over_x(v,x,res)
 
 end
 
+! ---
+
+subroutine check_sym(A, n)
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer                       :: i, j
+  double precision              :: dev_sym, norm, tmp
+
+  dev_sym = 0.d0
+  norm    = 0.d0
+  do i = 1, n
+    do j = i+1, n
+      tmp      = A(j,i) - A(i,j)
+      dev_sym += tmp * tmp
+      norm    += A(j,i) * A(j,i)
+    enddo
+  enddo
+  
+  print*, ' deviation from sym = ', dev_sym
+  print*, ' norm               = ', norm
+
+end subroutine check_sym
+
+! ---
+
 subroutine sum_A_At(A, N)
 
   !BEGIN_DOC