From 92a4e33f8a21717cab0c0e4f8412ed6903afb04a Mon Sep 17 00:00:00 2001
From: Abdallah Ammar <abd.ammar.phys@gmail.com>
Date: Mon, 16 Jan 2023 23:36:05 +0100
Subject: [PATCH]  clean fork with + TC stuffs

---
 src/ao_many_one_e_ints/ao_gaus_gauss.irp.f    |   67 +-
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   |  518 +++++++++
 src/ao_many_one_e_ints/grad2_jmu_modif.irp.f  |   49 +-
 .../grad2_jmu_modif_vect.irp.f                |   22 +-
 .../grad_lapl_jmu_manu.irp.f                  |  369 ++++++
 .../grad_lapl_jmu_modif.irp.f                 |   62 +-
 .../grad_related_ints.irp.f                   |   46 +-
 src/ao_many_one_e_ints/list_grid.irp.f        |   59 +
 src/ao_many_one_e_ints/listj1b.irp.f          |   15 +-
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   |  191 ++++
 .../prim_int_gauss_gauss.irp.f                |   99 +-
 src/ao_tc_eff_map/fit_j.irp.f                 |  125 ++
 src/ao_tc_eff_map/potential.irp.f             |  165 ++-
 src/bi_ort_ints/semi_num_ints_mo.irp.f        |  112 +-
 src/bi_ort_ints/three_body_ints_bi_ort.irp.f  |   43 +-
 src/bi_ortho_mos/bi_density.irp.f             |   57 +-
 src/bi_ortho_mos/mos_rl.irp.f                 |   47 +
 src/dft_utils_in_r/ao_in_r.irp.f              |   41 +
 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f      |  155 +++
 src/hartree_fock/fock_matrix_hf.irp.f         |   25 +-
 src/hartree_fock/scf.irp.f                    |   19 +-
 src/non_h_ints_mu/debug_integ_jmu_modif.irp.f |  195 +++-
 src/non_h_ints_mu/grad_squared.irp.f          |  122 +-
 src/non_h_ints_mu/grad_squared_manu.irp.f     |  221 ++++
 src/non_h_ints_mu/j12_nucl_utils.irp.f        |   17 +
 src/non_h_ints_mu/new_grad_tc.irp.f           |  284 ++++-
 src/non_h_ints_mu/new_grad_tc_manu.irp.f      |  174 +++
 src/non_h_ints_mu/total_tc_int.irp.f          |   51 +-
 src/non_hermit_dav/biorthog.irp.f             |   62 +-
 .../lapack_diag_non_hermit.irp.f              |  174 +--
 src/non_hermit_dav/new_routines.irp.f         |   53 +-
 src/scf_utils/diagonalize_fock.irp.f          |   11 +-
 src/scf_utils/diis.irp.f                      |  179 +++
 src/scf_utils/fock_matrix.irp.f               |    2 +
 src/scf_utils/rh_scf_simple.irp.f             |  129 +++
 src/scf_utils/roothaan_hall_scf.irp.f         |   33 +-
 .../save_bitcpsileft_for_qmcchem.irp.f        |   35 +-
 src/tc_bi_ortho/tc_som.irp.f                  |   70 ++
 src/tc_bi_ortho/test_tc_fock.irp.f            |   84 +-
 src/tc_keywords/EZFIO.cfg                     |   52 +-
 src/tc_scf/diago_bi_ort_tcfock.irp.f          |  105 +-
 src/tc_scf/diis_tcscf.irp.f                   |  186 +++
 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f         |  405 +++++++
 src/tc_scf/fock_tc.irp.f                      |  295 +++--
 src/tc_scf/fock_tc_mo_tot.irp.f               |   23 +
 src/tc_scf/fock_three.irp.f                   |  106 +-
 src/tc_scf/fock_three_bi_ortho_new_new.irp.f  |  400 ++++---
 src/tc_scf/rh_tcscf_diis.irp.f                |  362 ++++++
 src/tc_scf/rh_tcscf_simple.irp.f              |  129 +++
 src/tc_scf/rotate_tcscf_orbitals.irp.f        |    8 +-
 src/tc_scf/routines_rotates.irp.f             |   12 +-
 src/tc_scf/tc_scf.irp.f                       |  166 +--
 src/tc_scf/tc_scf_dm.irp.f                    |   42 +-
 src/tc_scf/tc_scf_energy.irp.f                |   14 +-
 src/tc_scf/tc_scf_utils.irp.f                 |    1 +
 src/tc_scf/test_int.irp.f                     | 1008 +++++++++++++++++
 src/tools/print_he_energy.irp.f               |    4 +-
 57 files changed, 6528 insertions(+), 972 deletions(-)
 create mode 100644 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
 create mode 100644 src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
 create mode 100644 src/ao_many_one_e_ints/list_grid.irp.f
 create mode 100644 src/ao_many_one_e_ints/listj1b_sorted.irp.f
 create mode 100644 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
 create mode 100644 src/non_h_ints_mu/grad_squared_manu.irp.f
 create mode 100644 src/non_h_ints_mu/new_grad_tc_manu.irp.f
 create mode 100644 src/scf_utils/rh_scf_simple.irp.f
 create mode 100644 src/tc_bi_ortho/tc_som.irp.f
 create mode 100644 src/tc_scf/diis_tcscf.irp.f
 create mode 100644 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
 create mode 100644 src/tc_scf/rh_tcscf_diis.irp.f
 create mode 100644 src/tc_scf/rh_tcscf_simple.irp.f
 create mode 100644 src/tc_scf/test_int.irp.f

diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
index 213a63e4..d2115d9e 100644
--- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -156,6 +156,53 @@ end function overlap_gauss_r12_ao
 
 ! --
 
+double precision function overlap_abs_gauss_r12_ao(D_center, delta, i, j)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: D_center(3), delta
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j
+
+  double precision, external   :: overlap_abs_gauss_r12
+
+  overlap_abs_gauss_r12_ao = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      analytical_j = overlap_abs_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+      overlap_abs_gauss_r12_ao += dabs(coef * analytical_j)
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao
+
+! --
+
 subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
 
   BEGIN_DOC
@@ -177,7 +224,7 @@ subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_
   double precision, allocatable :: analytical_j(:)
 
   resv(:) = 0.d0
-  if(ao_overlap_abs(j,i).lt.1.d-12) then
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
     return
   endif
 
@@ -313,9 +360,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
   ASSERT(beta .gt. 0.d0)
 
   if(beta .lt. 1d-10) then
-
     call overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
-
     return
   endif
 
@@ -332,19 +377,20 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
   A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
   A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
 
-  allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) )
+  allocate(fact_g(n_points), G_center(n_points,3), analytical_j(n_points))
 
   bg  = beta  * gama_inv
   dg  = delta * gama_inv
   bdg = bg * delta 
-  do ipoint=1,n_points
+
+  do ipoint = 1, n_points
+
     G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1)
     G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2)
     G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3)
-    fact_g(ipoint) = bdg * ( &
-          (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1))  &
-        + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2))  &
-        + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
+    fact_g(ipoint) = bdg * ( (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1)) &
+                           + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2)) &
+                           + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
 
     if(fact_g(ipoint) < 10d0) then
       fact_g(ipoint) = dexp(-fact_g(ipoint))
@@ -368,8 +414,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
       do ipoint = 1, n_points
         coef12f = coef12 * fact_g(ipoint)
         resv(ipoint) += coef12f * analytical_j(ipoint)
-      end do
-
+      enddo
     enddo
   enddo
 
diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
new file mode 100644
index 00000000..4dd87a60
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -0,0 +1,518 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+  double precision              :: int_gauss, dsqpi_3_2, int_j1b
+  double precision              :: factor_ij_1s, beta_ij, center_ij_1s(3), sq_pi_3_2 
+  double precision, allocatable :: int_fit_v(:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2_test ...'
+
+  sq_pi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
+  call wall_time(wall0)
+
+  int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                              &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,                                     &
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s) &
+     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_thr_b3_size,                   &
+     !$OMP          final_grid_points_transp, ng_fit_jast,                                                  &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,                                             &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,                                           &
+     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,              &
+     !$OMP          ao_overlap_abs,sq_pi_3_2)
+ !$OMP DO SCHEDULE(dynamic)
+ do ipoint = 1, n_points_final_grid
+   r(1) = final_grid_points(1,ipoint)
+   r(2) = final_grid_points(2,ipoint)
+   r(3) = final_grid_points(3,ipoint)
+   do i = 1, ao_num
+     do j = i, ao_num
+       if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+         cycle
+       endif
+  
+       do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+  
+         do i_fit = 1, ng_fit_jast
+  
+           expo_fit = expo_gauss_1_erf_x_2(i_fit)
+           !DIR$ FORCEINLINE
+           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+           coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+!           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+           if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle
+  
+!           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+!                 expo_fit, i, j, int_fit_v, n_points_final_grid)
+           int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+  
+           int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss 
+  
+         enddo
+        enddo
+       enddo
+     enddo
+   enddo
+
+   !$OMP END DO
+   !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+!  !
+!  END_DOC
+!
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+
+  double precision, allocatable :: int_fit_v(:),big_array(:,:,:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2_test_v ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+  call wall_time(wall0)
+
+ double precision :: int_j1b
+ big_array(:,:,:) = 0.d0
+ allocate(big_array(n_points_final_grid,ao_num, ao_num))
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
+     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size,&
+     !$OMP          final_grid_points_transp, ng_fit_jast,               &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
+     !$OMP          List_comb_thr_b3_cent, big_array,&
+     !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
+!
+ allocate(int_fit_v(n_points_final_grid))
+ !$OMP DO SCHEDULE(dynamic)
+ do i = 1, ao_num
+   do j = i, ao_num
+
+     if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+       cycle
+     endif
+
+      do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+!         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+       do i_fit = 1, ng_fit_jast
+
+         expo_fit = expo_gauss_1_erf_x_2(i_fit)
+         coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+
+         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, size(final_grid_points_transp,1),&
+               expo_fit, i, j, int_fit_v, size(int_fit_v,1),n_points_final_grid)
+
+         do ipoint = 1, n_points_final_grid
+           big_array(ipoint,j,i) += coef_fit * int_fit_v(ipoint)
+         enddo
+
+       enddo
+
+     enddo
+   enddo
+ enddo
+ !$OMP END DO
+ deallocate(int_fit_v)
+ !$OMP END PARALLEL
+ do i = 1, ao_num
+   do j = i, ao_num
+    do ipoint = 1, n_points_final_grid
+     int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,j,i)
+    enddo
+   enddo
+  enddo
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,i,j)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_v', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3), tmp
+  double precision              :: wall0, wall1,int_j1b
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2
+
+  print*, ' providing int2_u2_j1b2_test ...'
+
+  sq_pi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u2_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b,factor_ij_1s,beta_ij,center_ij_1s)          & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+          
+            expo_fit = expo_gauss_j_mu_x_2(i_fit)
+            coef_fit = coef_gauss_j_mu_x_2(i_fit)
+            !DIR$ FORCEINLINE
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+!            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle
+          
+            ! ---
+          
+              int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+          
+              tmp += coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u2_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u2_j1b2_test(j,i,ipoint) = int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s, i_fit
+  double precision :: r(3), int_fit(3), expo_fit, coef_fit
+  double precision :: coef, beta, B_center(3), dist
+  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
+  double precision :: tmp_x, tmp_y, tmp_z, int_j1b
+  double precision :: wall0, wall1, sq_pi_3_2,sq_alpha
+
+  print*, ' providing int2_u_grad1u_x_j1b2_test ...'
+
+  sq_pi_3_2 = dacos(-1.D0)**(1.d0)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u_grad1u_x_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
+ !$OMP          tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha)                        & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2)
+ !$OMP DO
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          do i_fit = 1, ng_fit_jast
+    
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+    
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv)
+!            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version
+            if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle
+            
+            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+
+            tmp_x += coef_tmp * int_fit(1)
+            tmp_y += coef_tmp * int_fit(2)
+            tmp_z += coef_tmp * int_fit(3)
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = tmp_x
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = tmp_y
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,1)
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,2)
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_x_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+  double precision :: j12_mu_r12,int_j1b
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+  double precision :: beta_ij,center_ij_1s(3),factor_ij_1s
+
+  print*, ' providing int2_u_grad1u_j1b2_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent
+  call wall_time(wall0)
+
+
+  int2_u_grad1u_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
+ !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, &
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+
+          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.1.d-15)cycle
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 20.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-08) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+        enddo
+
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index b7fe234f..8196614f 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -19,9 +19,11 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
   double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_grad1u2_grad2u2_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_grad1u2_grad2u2_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -51,7 +53,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += -0.25d0 * coef_fit * int_fit
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -88,7 +90,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2 =', wall1 - wall0
 
 END_PROVIDER 
 
@@ -111,9 +113,11 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
   double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_u2_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_u2_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -143,7 +147,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += coef_fit * int_fit
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -186,7 +190,7 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)]
 
   BEGIN_DOC
   !
@@ -202,9 +206,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   double precision :: tmp_x, tmp_y, tmp_z
   double precision :: wall0, wall1
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_u_grad1u_x_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_u_grad1u_x_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -241,7 +247,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
           tmp_x += coef_fit * int_fit(1)
           tmp_y += coef_fit * int_fit(2)
           tmp_z += coef_fit * int_fit(3)
-          if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
+!          if( dabs(coef_fit)*(dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
 
           ! ---
 
@@ -265,7 +271,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
+!            if(dabs(coef_tmp) .lt. 1d-12) cycle
             
             call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
@@ -278,9 +284,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
 
         enddo
 
-        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x
-        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y
-        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z
+        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = tmp_x
+        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = tmp_y
+        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = tmp_z
       enddo
     enddo
   enddo
@@ -290,15 +296,15 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
       do j = 1, i-1
-        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint)
-        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint)
-        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0
+  print*, ' wall time for int2_u_grad1u_x_j1b2 = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -320,9 +326,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
   double precision              :: wall0, wall1
   double precision, external    :: NAI_pol_mult_erf_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_u_grad1u_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_u_grad1u_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -351,7 +359,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
           ! ---
 
           int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r)
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit)*dabs(int_fit) .lt. 1d-12) cycle
 
           tmp += coef_fit * int_fit
 
@@ -375,9 +383,10 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
             centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 80.d0) cycle
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
-            
+            if(dabs(coef_tmp) .lt. 1d-12) cycle
+
             int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
 
             tmp += coef_tmp * int_fit
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
index 6d3931f5..21927371 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
@@ -241,7 +241,7 @@
 !
 !! ---
 !
-!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)]
+!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)]
 !
 !  BEGIN_DOC
 !  !
@@ -308,7 +308,7 @@
 !
 !          ! ---
 !
-!          int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_fit * int_fit_v(ipoint,1)
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,1) += coef_fit * int_fit_v(ipoint,1)
 !
 !          if(dabs(int_fit_v(ipoint,1)) .gt. 1d-10) then
 !            i_mask_grid1 += 1
@@ -320,7 +320,7 @@
 !
 !          ! ---
 !
-!          int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_fit * int_fit_v(ipoint,2)
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,2) += coef_fit * int_fit_v(ipoint,2)
 !
 !          if(dabs(int_fit_v(ipoint,2)) .gt. 1d-10) then
 !            i_mask_grid2 += 1
@@ -332,7 +332,7 @@
 !
 !          ! ---
 !
-!          int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_fit * int_fit_v(ipoint,3)
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,3) += coef_fit * int_fit_v(ipoint,3)
 !
 !          if(dabs(int_fit_v(ipoint,3)) .gt. 1d-10) then
 !            i_mask_grid3 += 1
@@ -408,15 +408,15 @@
 !          call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s, n_points_final_grid, 1.d+9, r_mask_grid, n_points_final_grid, int_fit_v, n_points_final_grid, i_mask_grid)
 !
 !          do ipoint = 1, i_mask_grid1
-!            int2_u_grad1u_x_j1b2(1,j,i,n_mask_grid(ipoint,1)) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1)
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,1),1) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1)
 !          enddo
 !
 !          do ipoint = 1, i_mask_grid2
-!            int2_u_grad1u_x_j1b2(2,j,i,n_mask_grid(ipoint,2)) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2)
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,2),2) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2)
 !          enddo
 !
 !          do ipoint = 1, i_mask_grid3
-!            int2_u_grad1u_x_j1b2(3,j,i,n_mask_grid(ipoint,3)) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3)
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,3),3) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3)
 !          enddo
 !
 !        enddo
@@ -439,15 +439,15 @@
 !  do ipoint = 1, n_points_final_grid
 !    do i = 2, ao_num
 !      do j = 1, i-1
-!        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint)
-!        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint)
-!        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3)
 !      enddo
 !    enddo
 !  enddo
 !
 !  call wall_time(wall1)
-!  print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0
+!  print*, ' wall time for int2_u_grad1u_x_j1b2 =', wall1 - wall0
 !
 !END_PROVIDER
 !
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
new file mode 100644
index 00000000..a6a55810
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -0,0 +1,369 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R| - 1) / |r - R|
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_mu, int_coulomb
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp,int_j1b
+  double precision           :: wall0, wall1
+  double precision, external :: NAI_pol_mult_erf_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+
+  print*, ' providing v_ij_erf_rk_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                         &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp, int_j1b)& 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b,  &
+ !$OMP          v_ij_erf_rk_cst_mu_j1b_test, mu_erf,                                   &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+          ! TODO :: cycle on the 1 - erf(mur12)
+          int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+          int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+
+          tmp += coef * (int_mu - int_coulomb)
+        enddo
+
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s
+  double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s
+
+  print*, ' providing x_v_ij_erf_rk_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide expo_erfc_mu_gauss ao_prod_sigma ao_prod_center
+  call wall_time(wall0)
+
+  x_v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
+ !$OMP          int_j1b, tmp_x, tmp_y, tmp_z,factor_ij_1s,beta_ij,center_ij_1s)       & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
+ !$OMP          x_v_ij_erf_rk_cst_mu_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma)
+! !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          if(ao_prod_center(1,j,i).ne.10000.d0)then
+!           ! approximate 1 - erf(mu r12) by a gaussian * 10
+!           !DIR$ FORCEINLINE
+!           call gaussian_product(expo_erfc_mu_gauss,r,     &
+!                ao_prod_sigma(j,i),ao_prod_center(1,j,i),  & 
+!                factor_ij_1s,beta_ij,center_ij_1s)
+!           if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-1.5d0)).lt.1.d-10)cycle 
+!          endif
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+          tmp_x += coef * (ints(1) - ints_coulomb(1))
+          tmp_y += coef * (ints(2) - ints_coulomb(2))
+          tmp_z += coef * (ints(3) - ints_coulomb(3))
+        enddo
+
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = tmp_x
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = tmp_y
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+! TODO analytically
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12)
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s, i_fit
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+
+  print*, ' providing v_ij_u_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_gauss_j_mu_x(i_fit)
+            coef_fit = coef_gauss_j_mu_x(i_fit)
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) with u(mu,r12) \approx 1/2 mu e^{-2.5 * mu (r12)^2}
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_ng_1_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s,  r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, expo_good_j_mu_1gauss,coef_good_j_mu_1gauss,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_good_j_mu_1gauss
+            coef_fit = 1.d0
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+!          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_ng_1_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_ng_1_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
index 6a662533..fc30cd83 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
@@ -17,9 +17,11 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
   double precision           :: wall0, wall1
   double precision, external :: NAI_pol_mult_erf_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print *, ' providing v_ij_erf_rk_cst_mu_j1b ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   v_ij_erf_rk_cst_mu_j1b = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                                         &
@@ -49,7 +51,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
 
         int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
         int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
-        if(dabs(int_mu - int_coulomb) .lt. 1d-10) cycle
+!        if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle
 
         tmp += coef * (int_mu - int_coulomb)
 
@@ -99,51 +101,23 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_
   ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
   END_DOC
 
-  implicit none
-  integer          :: i, j, ipoint
-  double precision :: wall0, wall1
-
-  call wall_time(wall0)
-
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = 1, ao_num
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
-  END_DOC
-
   implicit none
   integer          :: i, j, ipoint, i_1s
   double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
   double precision :: tmp_x, tmp_y, tmp_z
   double precision :: wall0, wall1
 
+  print*, ' providing x_v_ij_erf_rk_cst_mu_j1b ...'
   call wall_time(wall0)
 
-  x_v_ij_erf_rk_cst_mu_tmp_j1b = 0.d0
+  x_v_ij_erf_rk_cst_mu_j1b = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                                        &
  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
  !$OMP          tmp_x, tmp_y, tmp_z)                                                  & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,&
  !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,  &
- !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b, mu_erf)
+ !$OMP          x_v_ij_erf_rk_cst_mu_j1b, mu_erf)
  !$OMP DO
   !do ipoint = 1, 10
   do ipoint = 1, n_points_final_grid
@@ -169,7 +143,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
 
-        if( (dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
+!        if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
 
         tmp_x += coef * (ints(1) - ints_coulomb(1))
         tmp_y += coef * (ints(2) - ints_coulomb(2))
@@ -195,9 +169,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
 
         ! ---
 
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = tmp_x
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = tmp_y
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = tmp_z
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = tmp_x
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = tmp_y
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = tmp_z
       enddo
     enddo
   enddo
@@ -207,15 +181,15 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
       do j = 1, i-1
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint)
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint)
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp_j1b', wall1 - wall0
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b =', wall1 - wall0
 
 END_PROVIDER 
 
@@ -239,9 +213,11 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
 
   double precision, external :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing v_ij_u_cst_mu_j1b ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   v_ij_u_cst_mu_j1b = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -277,7 +253,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
           B_center(3) = List_all_comb_b2_cent(3,1)
 
           int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(int_fit*coef) .lt. 1d-12) cycle
 
           tmp += coef * coef_fit * int_fit
 
diff --git a/src/ao_many_one_e_ints/grad_related_ints.irp.f b/src/ao_many_one_e_ints/grad_related_ints.irp.f
index 67fb0fe7..8624e7b8 100644
--- a/src/ao_many_one_e_ints/grad_related_ints.irp.f
+++ b/src/ao_many_one_e_ints/grad_related_ints.irp.f
@@ -17,6 +17,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points
 
   double precision :: NAI_pol_mult_erf_ao
 
+  print*, ' providing v_ij_erf_rk_cst_mu ...'
+
   provide mu_erf final_grid_points 
   call wall_time(wall0)
 
@@ -54,7 +56,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points
   enddo
  
   call wall_time(wall1)
-  print*, ' wall time for v_ij_erf_rk_cst_mu  ', wall1 - wall0
+  print*, ' wall time for v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -73,6 +75,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_transp, (n_points_final_gr
   double precision :: wall0, wall1
   double precision :: NAI_pol_mult_erf_ao
 
+  print *, ' providing v_ij_erf_rk_cst_mu_transp ...'
+
   provide mu_erf final_grid_points 
   call wall_time(wall0)
 
@@ -107,7 +111,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_transp, (n_points_final_gr
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for v_ij_erf_rk_cst_mu_transp  ', wall1 - wall0
+  print *, ' wall time for v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -124,6 +128,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp, (3, ao_num, ao_num,
   double precision :: r(3), ints(3), ints_coulomb(3)
   double precision :: wall0, wall1
 
+  print*, ' providing x_v_ij_erf_rk_cst_mu_tmp ...'
+
   call wall_time(wall0)
 
  !$OMP PARALLEL                                 &
@@ -162,13 +168,13 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp, (3, ao_num, ao_num,
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_points_final_grid,3)]
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points_final_grid, 3)]
 
   BEGIN_DOC
   ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
@@ -178,6 +184,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_point
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_v_ij_erf_rk_cst_mu ...'
+
   call wall_time(wall0)
 
   do ipoint = 1, n_points_final_grid
@@ -191,7 +199,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_point
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for x_v_ij_erf_rk_cst_mu', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -207,6 +215,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp, (ao_num, ao_num,
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_v_ij_erf_rk_cst_mu_transp ...'
+
   call wall_time(wall0)
 
   do ipoint = 1, n_points_final_grid
@@ -220,13 +230,13 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp, (ao_num, ao_num,
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_final_grid,ao_num, ao_num,3)]
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_final_grid, ao_num, ao_num, 3)]
 
   BEGIN_DOC
   ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
@@ -236,6 +246,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_fi
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_v_ij_erf_rk_cst_mu_transp_bis ...'
+
   call wall_time(wall0)
 
   do i = 1, ao_num
@@ -249,7 +261,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_fi
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp_bis', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp_bis = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -268,7 +280,9 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_fin
  double precision :: r(3), ints(3), ints_coulomb(3)
  double precision :: wall0, wall1
 
- call wall_time(wall0)
+  print *, ' providing d_dx_v_ij_erf_rk_cst_mu_tmp ...'
+
+  call wall_time(wall0)
 
  !$OMP PARALLEL                                 &
  !$OMP DEFAULT (NONE)                           &
@@ -295,7 +309,7 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_fin
  !$OMP END PARALLEL
 
   call wall_time(wall1)
-  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0
+  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -315,6 +329,8 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing d_dx_v_ij_erf_rk_cst_mu ...'
+
   call wall_time(wall0)
   do i = 1, ao_num
     do j = 1, ao_num
@@ -327,7 +343,7 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu', wall1 - wall0
+  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -348,6 +364,8 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_f
   double precision :: r(3), ints(3), ints_coulomb(3)
   double precision :: wall0, wall1
 
+  print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu_tmp ...'
+
   call wall_time(wall0)
 
  !$OMP PARALLEL                                 &
@@ -375,7 +393,7 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_f
  !$OMP END PARALLEL
 
   call wall_time(wall1)
-  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0
+  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -395,6 +413,8 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu, (n_points_final_gr
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu ...'
+
   call wall_time(wall0)
 
   do i = 1, ao_num
@@ -408,7 +428,7 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu, (n_points_final_gr
   enddo
  
   call wall_time(wall1)
-  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu', wall1 - wall0
+  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
diff --git a/src/ao_many_one_e_ints/list_grid.irp.f b/src/ao_many_one_e_ints/list_grid.irp.f
new file mode 100644
index 00000000..ccdc33ad
--- /dev/null
+++ b/src/ao_many_one_e_ints/list_grid.irp.f
@@ -0,0 +1,59 @@
+ BEGIN_PROVIDER [ integer, n_pts_grid_ao_prod, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_n_pts_grid_ao_prod]
+ implicit none
+ integer :: i,j,ipoint
+ double precision :: overlap, r(3),thr, overlap_abs_gauss_r12_ao,overlap_gauss_r12_ao
+ double precision :: sigma,dist,center_ij(3),fact_gauss, alpha, center(3)
+ n_pts_grid_ao_prod = 0
+ thr = 1.d-11
+ print*,' expo_good_j_mu_1gauss = ',expo_good_j_mu_1gauss
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, r, overlap, thr,fact_gauss, alpha, center,dist,sigma,center_ij) &
+ !$OMP SHARED  (n_points_final_grid, ao_num, ao_overlap_abs_grid,n_pts_grid_ao_prod,expo_good_j_mu_1gauss,&
+ !$OMP          final_grid_points,ao_prod_center,ao_prod_sigma,ao_nucl)
+ !$OMP DO
+ do i = 1, ao_num
+! do i = 3,3
+  do j = 1, ao_num
+! do i = 22,22
+!  do j = 9,9
+   center_ij(1:3) = ao_prod_center(1:3,j,i)
+   sigma = ao_prod_sigma(j,i)
+   sigma *= sigma
+   sigma = 0.5d0 /sigma
+!   if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+   do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+    dist  = (center_ij(1) - r(1))*(center_ij(1) - r(1))
+    dist += (center_ij(2) - r(2))*(center_ij(2) - r(2))
+    dist += (center_ij(3) - r(3))*(center_ij(3) - r(3))
+    dist = dsqrt(dist)
+    call gaussian_product(sigma, center_ij, expo_good_j_mu_1gauss, r, fact_gauss, alpha, center)
+!    print*,''
+!    print*,j,i,ao_overlap_abs_grid(j,i),ao_overlap_abs(j,i)
+!    print*,r
+!    print*,dist,sigma
+!    print*,fact_gauss
+    if( fact_gauss*ao_overlap_abs_grid(j,i).lt.1.d-11)cycle
+    if(ao_nucl(i) == ao_nucl(j))then
+     overlap = overlap_abs_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    else
+     overlap = overlap_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    endif
+!    print*,overlap
+    if(dabs(overlap).lt.thr)cycle
+    n_pts_grid_ao_prod(j,i) += 1
+   enddo
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(n_pts_grid_ao_prod(:,i))
+ enddo
+ max_n_pts_grid_ao_prod = maxval(list) 
+END_PROVIDER 
diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f
index 0b40170c..e27bf723 100644
--- a/src/ao_many_one_e_ints/listj1b.irp.f
+++ b/src/ao_many_one_e_ints/listj1b.irp.f
@@ -102,6 +102,12 @@ END_PROVIDER
     List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i))
   enddo
 
+  print *, ' coeff, expo & cent of list b2'
+  do i = 1, List_all_comb_b2_size
+    print*, i, List_all_comb_b2_coef(i), List_all_comb_b2_expo(i)
+    print*, List_all_comb_b2_cent(1,i), List_all_comb_b2_cent(2,i), List_all_comb_b2_cent(3,i)
+  enddo
+
 END_PROVIDER
 
 ! ---
@@ -168,7 +174,6 @@ END_PROVIDER
 
     do j = 1, nucl_num
       tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
-      !print*, List_all_comb_b3(j,i), j1b_pen(j)
       List_all_comb_b3_expo(i)   += tmp_alphaj
       List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1)
       List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2)
@@ -220,9 +225,11 @@ END_PROVIDER
     List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i))
   enddo
 
-  print *, ' 1st coeff & expo of lists'
-  print*, List_all_comb_b2_coef(1), List_all_comb_b2_expo(1)
-  print*, List_all_comb_b3_coef(1), List_all_comb_b3_expo(1)
+  print *, ' coeff, expo & cent of list b3'
+  do i = 1, List_all_comb_b3_size
+    print*, i, List_all_comb_b3_coef(i), List_all_comb_b3_expo(i)
+    print*, List_all_comb_b3_cent(1,i), List_all_comb_b3_cent(2,i), List_all_comb_b3_cent(3,i)
+  enddo
 
 END_PROVIDER
 
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
new file mode 100644
index 00000000..bf493fbb
--- /dev/null
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -0,0 +1,191 @@
+
+ BEGIN_PROVIDER [ integer, List_comb_thr_b2_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ List_comb_thr_b2_size = 0
+ do i = 1, ao_num
+  do j = i, ao_num
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-15)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     beta = max(beta,1.d-12)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_thr_b2_size(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+ do i = 1, ao_num
+  do j = 1, i-1
+    List_comb_thr_b2_size(j,i) = List_comb_thr_b2_size(i,j)
+  enddo
+ enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_thr_b2_size(:,i))
+ enddo
+ max_List_comb_thr_b2_size = maxval(list) 
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b2_coef, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_expo, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_cent, (3, max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ ao_abs_comb_b2_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = i, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-12)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b2_coef(icount,j,i) = coef
+      List_comb_thr_b2_expo(icount,j,i) = beta
+      List_comb_thr_b2_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b2_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+ do i = 1, ao_num
+  do j = 1, i-1
+    do icount = 1, List_comb_thr_b2_size(j,i)
+     List_comb_thr_b2_coef(icount,j,i) = List_comb_thr_b2_coef(icount,i,j)
+     List_comb_thr_b2_expo(icount,j,i) = List_comb_thr_b2_expo(icount,i,j)
+     List_comb_thr_b2_cent(1:3,icount,j,i) = List_comb_thr_b2_cent(1:3,icount,i,j)
+    enddo
+  enddo
+ enddo
+ 
+END_PROVIDER 
+
+
+ BEGIN_PROVIDER [ integer, List_comb_thr_b3_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ List_comb_thr_b3_size = 0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     beta        = List_all_comb_b3_expo  (i_1s)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     if(dabs(coef).lt.thr)cycle
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_thr_b3_size(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    List_comb_thr_b3_size(j,i) = List_comb_thr_b3_size(i,j)
+!  enddo
+! enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_thr_b3_size(:,i))
+ enddo
+ max_List_comb_thr_b3_size = maxval(list) 
+ print*,'max_List_comb_thr_b3_size =  ',max_List_comb_thr_b3_size
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b3_coef, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_expo, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_cent, (3, max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ ao_abs_comb_b3_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     beta        = List_all_comb_b3_expo  (i_1s)
+     beta = max(beta,1.d-12)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     if(dabs(coef).lt.thr)cycle
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b3_coef(icount,j,i) = coef
+      List_comb_thr_b3_expo(icount,j,i) = beta
+      List_comb_thr_b3_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b3_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    do icount = 1, List_comb_thr_b3_size(j,i)
+!     List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j)
+!     List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j)
+!     List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j)
+!    enddo
+!  enddo
+! enddo
+ 
+END_PROVIDER 
+
diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
index cfdaf95f..54c2d95b 100644
--- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -1,5 +1,9 @@
-double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+! ---
+
+double precision function overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
   BEGIN_DOC
+  !
   ! Computes the following integral :
   !
   ! .. math                      ::
@@ -8,6 +12,72 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
   !
   END_DOC
 
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)          :: power_A(3),power_B(3)
+
+  double precision             :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision             :: A_new(0:max_dim,3)! new polynom
+  double precision             :: A_center_new(3)   ! new center
+  integer                      :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision             :: alpha_new         ! new exponent
+  double precision             :: fact_a_new        ! constant factor
+  double precision             :: accu, coefx, coefy, coefz, coefxy, coefxyz, thr
+  integer                      :: d(3), i, lx, ly, lz, iorder_tmp(3), dim1
+
+  dim1 = 100
+  thr  = 1.d-10
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_gauss_r12 = 0.d0
+
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)*fact_a_new
+    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+
+    do ly = 0, iorder_a_new(2)
+      coefy  = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy) .lt. thr) cycle
+      iorder_tmp(2) = ly
+
+      do lz = 0, iorder_a_new(3)
+        coefz   = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz) .lt. thr) cycle
+        iorder_tmp(3) = lz
+
+        call overlap_gaussian_xyz( A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B &
+                                 , overlap_x, overlap_y, overlap_z, overlap, dim1)
+
+        accu += coefxyz * overlap
+      enddo
+    enddo
+  enddo
+  overlap_gauss_r12 = accu
+end
+
+!---
+double precision function overlap_abs_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) |(x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )|
+  !
+  END_DOC
+
   implicit none
   include 'constants.include.F'
   double precision, intent(in)   :: D_center(3), delta  ! pure gaussian "D"
@@ -21,20 +91,23 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
   integer                        :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
   double precision               :: alpha_new         ! new exponent
   double precision               :: fact_a_new        ! constant factor
-  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr
+  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,dx,lower_exp_val
   integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
-  dim1=100
-  thr = 1.d-10
+  dim1=50
+  lower_exp_val = 40.d0
+  thr = 1.d-12
   d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_abs_gauss_r12 = 0.d0
 
   ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
   call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
       delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
   ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
   accu = 0.d0
   do lx = 0, iorder_a_new(1)
-    coefx = A_new(lx,1)
-    if(dabs(coefx).lt.thr)cycle
+    coefx = A_new(lx,1)*fact_a_new
+!    if(dabs(coefx).lt.thr)cycle
     iorder_tmp(1) = lx
     do ly = 0, iorder_a_new(2)
       coefy = A_new(ly,2)
@@ -46,12 +119,14 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
         coefxyz = coefxy * coefz
         if(dabs(coefxyz).lt.thr)cycle
         iorder_tmp(3) = lz
-        call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
-        accu += coefxyz * overlap
+        call overlap_x_abs(A_center_new(1),B_center(1),alpha_new,beta,iorder_tmp(1),power_B(1),overlap_x,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(2),B_center(2),alpha_new,beta,iorder_tmp(2),power_B(2),overlap_y,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(3),B_center(3),alpha_new,beta,iorder_tmp(3),power_B(3),overlap_z,lower_exp_val,dx,dim1)
+        accu += dabs(coefxyz * overlap_x * overlap_y * overlap_z)
       enddo
     enddo
   enddo
-  overlap_gauss_r12 = fact_a_new * accu
+  overlap_abs_gauss_r12= accu
 end
 
 !---
@@ -95,11 +170,9 @@ subroutine overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_
 
   maxab = maxval(power_A(1:3))
 
-  allocate(A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
+  allocate(A_new(n_points,0:maxab,3), A_center_new(n_points,3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
 
-  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new,   &
-        alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, &
-        D_center, LD_D, A_center, n_points)
+  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, D_center, LD_D, A_center, n_points)
 
   rvec(:) = 0.d0
 
diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f
index 8fad9079..4730d003 100644
--- a/src/ao_tc_eff_map/fit_j.irp.f
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@@ -1,5 +1,40 @@
+ BEGIN_PROVIDER [ double precision, expo_j_xmu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_j_xmu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! Upper bound long range fit of F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) 
+ !
+ ! with a single gaussian. 
+ !
+ ! Such a function can be used to screen integrals with F(x). 
+ END_DOC
+ expo_j_xmu_1gauss  = 0.5d0
+ coef_j_xmu_1gauss  = 1.d0
+END_PROVIDER 
 ! ---
 
+BEGIN_PROVIDER [ double precision, expo_erfc_gauss ]
+ implicit none 
+ expo_erfc_gauss = 1.41211d0
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, expo_erfc_mu_gauss ]
+ implicit none 
+ expo_erfc_mu_gauss = expo_erfc_gauss * mu_erf * mu_erf
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! exponent of Gaussian in order to obtain an upper bound of J(r12,mu)
+ !
+ ! Can be used to scree integrals with J(r12,mu)
+ END_DOC
+ expo_good_j_mu_1gauss = 2.D0 * mu_erf * expo_j_xmu_1gauss
+ coef_good_j_mu_1gauss = 0.5d0/mu_erf * coef_j_xmu_1gauss
+ END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, expo_j_xmu, (n_fit_1_erf_x) ]
  implicit none
  BEGIN_DOC
@@ -88,6 +123,36 @@ END_PROVIDER
       expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_x = (/ -0.01756495d0 , -0.01023623d0  , -0.06548959d0  , -0.03539446d0  , -0.17150646d0  , -0.15071096d0  , -0.11326834d0   /)
+    expo_gauss_j_mu_x = (/ 9.88572565d+02,  1.21363371d+04,  3.69794870d+01,  1.67364529d+02,  3.03962934d+00,  1.27854005d+00,  9.76383343d+00 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_x = (/ -0.11489205d0 , -0.16008968d0 , -0.12892456d0 , -0.04250838d0 , -0.0718451d0  , -0.02394051d0 , -0.00913353d0 , -0.01285182d0  /)
+    expo_gauss_j_mu_x = (/ 6.97632442d+00, 2.56010878d+00, 1.22760977d+00, 7.47697124d+01, 2.16104215d+01, 2.96549728d+02, 1.40773328d+04, 1.43335159d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_x = (/ /)
+  !  expo_gauss_j_mu_x = (/ /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
@@ -189,6 +254,36 @@ END_PROVIDER
       expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.05202849d0  , 0.01031081d0  , 0.04699157d0  , 0.01451002d0  , 0.07442576d0  , 0.02692033d0  , 0.09311842d0   /)
+    expo_gauss_j_mu_x_2 = (/ 3.04469415d+00, 1.40682034d+04, 7.45960945d+01, 1.43067466d+03, 2.16815661d+01, 2.95750306d+02, 7.23471236d+00 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.00942115d0  , 0.07332421d0  , 0.0508308d0   , 0.08204949d0  , 0.0404099d0   , 0.03201288d0  , 0.01911313d0  , 0.01114732d0   /)
+    expo_gauss_j_mu_x_2 = (/ 1.56957321d+04, 1.52867810d+01, 4.36016903d+01, 5.96818956d+00, 2.85535269d+00, 1.36064008d+02, 4.71968910d+02, 1.92022350d+03 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_x_2 = (/  /)
+  !  expo_gauss_j_mu_x_2 = (/  /)
+  !  
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
@@ -293,6 +388,36 @@ END_PROVIDER
       expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.11853067d0 , -0.01522824d0  , -0.07419098d0  , -0.022202d0    , -0.12242283d0  , -0.04177571d0  , -0.16983107d0  /)
+    expo_gauss_j_mu_1_erf = (/ 2.74057056d+00,  1.37626591d+04,  6.65578663d+01,  1.34693031d+03,  1.90547699d+01,  2.69445390d+02,  6.31845879d+00/)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.12263328d0 , -0.04965255d0 , -0.15463564d0 , -0.09675781d0 , -0.0807023d0  , -0.02923298d0 , -0.01381381d0 , -0.01675923d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.36101994d+01, 1.24908367d+02, 5.29061388d+00, 2.60692516d+00, 3.93396935d+01, 4.43071610d+02, 1.54902240d+04, 1.85170446d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_1_erf = (/  /)
+  !  expo_gauss_j_mu_1_erf = (/  /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
diff --git a/src/ao_tc_eff_map/potential.irp.f b/src/ao_tc_eff_map/potential.irp.f
index 67d572e5..5b72b567 100644
--- a/src/ao_tc_eff_map/potential.irp.f
+++ b/src/ao_tc_eff_map/potential.irp.f
@@ -1,59 +1,79 @@
+! ---
+
 BEGIN_PROVIDER [integer, n_gauss_eff_pot]
- implicit none
- BEGIN_DOC
-! number of gaussians to represent the effective potential :
-!
-! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
-!
-! Here (1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
- END_DOC
- n_gauss_eff_pot = n_max_fit_slat + 1
+
+  BEGIN_DOC
+  ! number of gaussians to represent the effective potential :
+  !
+  ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
+  !
+  ! Here (1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  implicit none
+
+  n_gauss_eff_pot = ng_fit_jast + 1
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [integer, n_gauss_eff_pot_deriv]
- implicit none
- BEGIN_DOC
-! V(r12) = -(1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
- END_DOC
- n_gauss_eff_pot_deriv = n_max_fit_slat 
+
+  BEGIN_DOC
+  ! V(r12) = -(1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  implicit none
+  n_gauss_eff_pot_deriv = ng_fit_jast
+
 END_PROVIDER 
 
+! ---
+
  BEGIN_PROVIDER [double precision, expo_gauss_eff_pot, (n_gauss_eff_pot)]
 &BEGIN_PROVIDER [double precision, coef_gauss_eff_pot, (n_gauss_eff_pot)]
- implicit none
- BEGIN_DOC
-! Coefficients and exponents of the Fit on Gaussians of V(X) = -(1 - erf(mu*X))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*X)^2)
-!
-! V(X) = \sum_{i=1,n_gauss_eff_pot} coef_gauss_eff_pot(i) * exp(-expo_gauss_eff_pot(i) * X^2)
-!
-! Relies on the fit proposed in Eqs A11-A20 in JCP 154, 084119 (2021)
- END_DOC
- include 'constants.include.F'
 
- integer :: i
- ! fit of the -0.25 * (1 - erf(mu*x))^2 with n_max_fit_slat gaussians 
- do i = 1, n_max_fit_slat
-  expo_gauss_eff_pot(i) = expo_gauss_1_erf_x_2(i) 
-  coef_gauss_eff_pot(i) = -0.25d0 * coef_gauss_1_erf_x_2(i) ! -1/4 * (1 - erf(mu*x))^2
- enddo
- ! Analytical Gaussian part of the potential: + 1/(\sqrt(pi)mu) * exp(-(mu*x)^2) 
- expo_gauss_eff_pot(n_max_fit_slat+1) = mu_erf * mu_erf
- coef_gauss_eff_pot(n_max_fit_slat+1) =  1.d0 * mu_erf * inv_sq_pi
+  BEGIN_DOC
+  ! Coefficients and exponents of the Fit on Gaussians of V(X) = -(1 - erf(mu*X))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*X)^2)
+  !
+  ! V(X) = \sum_{i=1,n_gauss_eff_pot} coef_gauss_eff_pot(i) * exp(-expo_gauss_eff_pot(i) * X^2)
+  !
+  ! Relies on the fit proposed in Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+  integer :: i
+ 
+  ! fit of the -0.25 * (1 - erf(mu*x))^2 with n_max_fit_slat gaussians 
+  do i = 1, ng_fit_jast
+   expo_gauss_eff_pot(i) = expo_gauss_1_erf_x_2(i) 
+   coef_gauss_eff_pot(i) = -0.25d0 * coef_gauss_1_erf_x_2(i) ! -1/4 * (1 - erf(mu*x))^2
+  enddo
+
+  ! Analytical Gaussian part of the potential: + 1/(\sqrt(pi)mu) * exp(-(mu*x)^2) 
+  expo_gauss_eff_pot(ng_fit_jast+1) = mu_erf * mu_erf
+  coef_gauss_eff_pot(ng_fit_jast+1) =  1.d0 * mu_erf * inv_sq_pi
 
 END_PROVIDER 
 
+! ---
+
+double precision function eff_pot_gauss(x, mu)
+
+  BEGIN_DOC
+  ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
+  END_DOC
+
+  implicit none
+  double precision, intent(in) :: x, mu
+
+  eff_pot_gauss =  mu/dsqrt(dacos(-1.d0)) * dexp(-mu*mu*x*x) - 0.25d0 * (1.d0 - derf(mu*x))**2.d0
 
-double precision function eff_pot_gauss(x,mu)
- implicit none
- BEGIN_DOC
- ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
- END_DOC
- double precision, intent(in) :: x,mu
- eff_pot_gauss =  mu/dsqrt(dacos(-1.d0)) * dexp(-mu*mu*x*x) - 0.25d0 * (1.d0 - derf(mu*x))**2.d0
 end
 
-
-
 ! -------------------------------------------------------------------------------------------------
 ! ---
 
@@ -129,16 +149,19 @@ END_PROVIDER
 ! ---
 
 double precision function fit_1_erf_x(x)
- implicit none
- double precision, intent(in) :: x
- BEGIN_DOC
-! fit_1_erf_x(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x))
- END_DOC
- integer :: i
- fit_1_erf_x = 0.d0
- do i = 1, n_max_fit_slat
-  fit_1_erf_x += dexp(-expo_gauss_1_erf_x(i) *x*x) * coef_gauss_1_erf_x(i)
- enddo
+
+  BEGIN_DOC
+  ! fit_1_erf_x(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x))
+  END_DOC
+
+  implicit none
+  integer :: i
+  double precision, intent(in) :: x
+
+  fit_1_erf_x = 0.d0
+  do i = 1, n_max_fit_slat
+    fit_1_erf_x += dexp(-expo_gauss_1_erf_x(i) *x*x) * coef_gauss_1_erf_x(i)
+  enddo
 
 end
 
@@ -165,7 +188,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 6.23519457d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -175,7 +198,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 55.39184787d0, 3.92151407d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -185,7 +208,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 19.90272209d0, 3.2671671d0 , 336.47320445d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -195,7 +218,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 6467.28126d0, 46.9071990d0, 9.09617721d0, 2.76883328d0, 360.367093d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -205,10 +228,40 @@ end
     expo_gauss_1_erf_x_2 = (/ 2.54293498d+01, 1.40317872d+02, 7.14630801d+00, 2.65517675d+00, 1.45142619d+03, 1.00000000d+04 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.0213619d0   , 0.03221511d0  , 0.29966689d0  , 0.19178934d0  , 0.06154732d0  , 0.28214555d0  , 0.11125985d0   /)
+    expo_gauss_1_erf_x_2 = (/ 1.34727067d+04, 1.27166613d+03, 5.52584567d+00, 1.67753218d+01, 2.46145691d+02, 2.47971820d+00, 5.95141293d+01 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.28189124d0  , 0.19518669d0  , 0.12161735d0  , 0.24257438d0  , 0.07309656d0  , 0.042435d0    , 0.01926109d0  , 0.02393415d0   /)
+    expo_gauss_1_erf_x_2 = (/ 4.69795903d+00, 1.21379451d+01, 3.55527053d+01, 2.39227172d+00, 1.14827721d+02, 4.16320213d+02, 1.52813587d+04, 1.78516557d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_1_erf_x_2 = (/  /)
+  !  expo_gauss_1_erf_x_2 = (/  /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
index 33f512cf..4694a998 100644
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -107,50 +107,69 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing int2_grad1_u12_ao_transp ...'
   call wall_time(wall0)
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = 1, ao_num
-        int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(1,j,i,ipoint)
-        int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(2,j,i,ipoint)
-        int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(3,j,i,ipoint)
-      enddo
-    enddo
-  enddo
+
+  if(test_cycle_tc)then
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3)
+       enddo
+     enddo
+   enddo
+  else
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3)
+       enddo
+     enddo
+   enddo
+  endif
   call wall_time(wall1)
   print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0
 
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)]
 
   implicit none
   integer :: ipoint
+  double precision :: wall0, wall1
 
- print*,'providing int2_grad1_u12_bimo_transp'
- double precision :: wall0, wall1
- call wall_time(wall0)
- !$OMP PARALLEL         &
- !$OMP DEFAULT (NONE)   &
- !$OMP PRIVATE (ipoint) & 
- !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
- !$OMP DO SCHEDULE (dynamic)
-  do ipoint = 1, n_points_final_grid
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
- call wall_time(wall1)
- print*,'Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
+  !print *, ' providing int2_grad1_u12_bimo_transp'
+
+  call wall_time(wall0)
+  !$OMP PARALLEL         &
+  !$OMP DEFAULT (NONE)   &
+  !$OMP PRIVATE (ipoint) & 
+  !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
+  !$OMP DO SCHEDULE (dynamic)
+   do ipoint = 1, n_points_final_grid
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
+
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3, mo_num, mo_num )]
  implicit none
  integer          :: i, j, ipoint
@@ -165,35 +184,22 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3
  enddo
 END_PROVIDER 
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo, (3, mo_num, mo_num, n_points_final_grid)]
+! ---
 
-  BEGIN_DOC
-  !
-  ! int2_grad1_u12_bimo(:,k,i,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \chi_k(r2) \phi_i(r2) 
-  !
-  END_DOC
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)]
 
   implicit none
-  integer :: ipoint
-  print*,'Wrong !!'
-  stop
- !$OMP PARALLEL         &
- !$OMP DEFAULT (NONE)   &
- !$OMP PRIVATE (ipoint) & 
- !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao,int2_grad1_u12_bimo)
- !$OMP DO SCHEDULE (dynamic)
+  integer :: i, j, ipoint
+
   do ipoint = 1, n_points_final_grid
-
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao  (1,1,1,ipoint), size(int2_grad1_u12_ao  , 2) &
-                          , int2_grad1_u12_bimo(1,1,1,ipoint), size(int2_grad1_u12_bimo, 2) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao  (2,1,1,ipoint), size(int2_grad1_u12_ao  , 2) &
-                          , int2_grad1_u12_bimo(2,1,1,ipoint), size(int2_grad1_u12_bimo, 2) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao  (3,1,1,ipoint), size(int2_grad1_u12_ao  , 2) &
-                          , int2_grad1_u12_bimo(3,1,1,ipoint), size(int2_grad1_u12_bimo, 2) )
-
+    do i = 1, ao_num
+      do j = 1, ao_num
+        int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(j,i,ipoint,1)
+        int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(j,i,ipoint,2)
+        int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(j,i,ipoint,3)
+      enddo                                  
+    enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
 
 END_PROVIDER 
 
diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
index c1c27f06..48fa84f7 100644
--- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
+++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
@@ -15,7 +15,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n
  character*(128)  :: name_file 
 
   three_body_ints_bi_ort = 0.d0
-  print*,'Providing the three_body_ints_bi_ort ...'
+  print *, ' Providing the three_body_ints_bi_ort ...'
   call wall_time(wall0)
   name_file = 'six_index_tensor'
 
@@ -71,7 +71,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
   !
   END_DOC
 
@@ -104,12 +104,11 @@ end subroutine give_integrals_3_body_bi_ort
 
 ! ---
 
-
 subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
   !
   END_DOC
 
@@ -170,3 +169,39 @@ end subroutine give_integrals_3_body_bi_ort_old
 
 ! ---
 
+subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral)
+
+  BEGIN_DOC
+  !
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n, l, k, m, j, i
+  double precision, intent(out) :: integral
+  integer                       :: ipoint
+  double precision              :: weight
+
+  integral = 0.d0
+  do ipoint = 1, n_points_final_grid
+    weight = final_weight_at_r_vector(ipoint)                                                                          
+
+    integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) )
+    integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+    integral += weight * aos_in_r_array_transp(ipoint,n) * aos_in_r_array_transp(ipoint,m) &
+              * ( int2_grad1_u12_ao_t(ipoint,1,l,j) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,l,j) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,l,j) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+
+  enddo
+
+end subroutine give_integrals_3_body_bi_ort_ao
+
+! ---
diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f
index 947be870..90fe9634 100644
--- a/src/bi_ortho_mos/bi_density.irp.f
+++ b/src/bi_ortho_mos/bi_density.irp.f
@@ -2,47 +2,66 @@
 ! ---
 
 BEGIN_PROVIDER [double precision, TCSCF_bi_ort_dm_ao_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao_alpha(i,j) = <Chi_0| a^dagger_i,alpha a_j,alpha |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the alpha density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
   implicit none
- BEGIN_DOC
-! TCSCF_bi_ort_dm_ao_alpha(i,j) = <Chi_0| a^dagger_i,alpha a_j,alpha |Phi_0> where i,j are AO basis. 
-!
-! This is the equivalent of the alpha density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
- END_DOC
+
+  PROVIDE mo_l_coef mo_r_coef
+
   call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
             , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
             , 0.d0, TCSCF_bi_ort_dm_ao_alpha, size(TCSCF_bi_ort_dm_ao_alpha, 1) )
+
 END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao_beta(i,j) = <Chi_0| a^dagger_i,beta a_j,beta |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
   implicit none
- BEGIN_DOC
-! TCSCF_bi_ort_dm_ao_beta(i,j) = <Chi_0| a^dagger_i,beta a_j,beta |Phi_0> where i,j are AO basis. 
-!
-! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
- END_DOC
-  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0               &
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
             , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
             , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) )
+
 END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao(i,j) = <Chi_0| a^dagger_i,beta+alpha a_j,beta+alpha |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the total electronic density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
   implicit none
- BEGIN_DOC
-! TCSCF_bi_ort_dm_ao(i,j) = <Chi_0| a^dagger_i,beta+alpha a_j,beta+alpha |Phi_0> where i,j are AO basis. 
-!
-! This is the equivalent of the total electronic density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
- END_DOC
-  ASSERT ( size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_alpha, 1) )
-  if( elec_alpha_num==elec_beta_num ) then
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_alpha, 1))
+
+  if(elec_alpha_num==elec_beta_num) then
     TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_alpha
   else
-    ASSERT ( size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_beta, 1))
+    ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_beta, 1))
     TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_beta
   endif
+
 END_PROVIDER
 
 ! ---
diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f
index 034a436e..d51999fc 100644
--- a/src/bi_ortho_mos/mos_rl.irp.f
+++ b/src/bi_ortho_mos/mos_rl.irp.f
@@ -37,6 +37,52 @@ end subroutine ao_to_mo_bi_ortho
 
 ! ---
 
+subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao)
+
+  BEGIN_DOC
+  !
+  ! mo_l_coef.T x     A_ao   x mo_r_coef = A_mo
+  ! mo_l_coef.T x ao_overlap x mo_r_coef =  I
+  !
+  ! ==> A_ao = (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: LDA_ao, LDA_mo
+  double precision, intent(in)  :: A_mo(LDA_mo,mo_num)
+  double precision, intent(out) :: A_ao(LDA_ao,ao_num)
+  double precision, allocatable :: tmp_1(:,:), tmp_2(:,:)
+
+  ! ao_overlap x mo_r_coef
+  allocate( tmp_1(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo
+  allocate( tmp_2(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 &
+            , tmp_1, size(tmp_1, 1), A_mo, LDA_mo    &
+            , 0.d0, tmp_2, size(tmp_2, 1) )
+  
+  ! ao_overlap x mo_l_coef
+  tmp_1 = 0.d0
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  call dgemm( 'N', 'T', ao_num, ao_num, mo_num, 1.d0       &
+            , tmp_2, size(tmp_2, 1), tmp_1, size(tmp_1, 1) &
+            , 0.d0, A_ao, LDA_ao )
+  
+  deallocate(tmp_1, tmp_2)
+
+end subroutine mo_to_ao_bi_ortho
+
+! ---
+
 BEGIN_PROVIDER [ double precision, mo_r_coef, (ao_num, mo_num) ]
 
   BEGIN_DOC
@@ -175,3 +221,4 @@ END_PROVIDER
 
 ! ---
 
+
diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f
index 6fa6a4c7..72f820ec 100644
--- a/src/dft_utils_in_r/ao_in_r.irp.f
+++ b/src/dft_utils_in_r/ao_in_r.irp.f
@@ -40,6 +40,47 @@
  END_PROVIDER
 
 
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i,r,aos_array,j) & 
+ !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra)
+ do i = 1, n_points_extra_final_grid
+  r(1) = final_grid_points_extra(1,i)
+  r(2) = final_grid_points_extra(2,i)
+  r(3) = final_grid_points_extra(3,i)
+  call give_all_aos_at_r(r,aos_array)
+  do j = 1, ao_num
+   aos_in_r_array_extra(j,i) = aos_array(j)
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ do i = 1, n_points_extra_final_grid
+  do j = 1, ao_num
+   aos_in_r_array_extra_transp(i,j) = aos_in_r_array_extra(j,i) 
+  enddo
+ enddo
+
+ END_PROVIDER
+
+
 
  BEGIN_PROVIDER[double precision, aos_grad_in_r_array, (ao_num,n_points_final_grid,3)]
  implicit none
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
new file mode 100644
index 00000000..39ea0cdf
--- /dev/null
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -0,0 +1,155 @@
+
+BEGIN_PROVIDER [ double precision, ao_abs_int_grid, (ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_abs_int_grid(i) = \int dr |phi_i(r) |
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_abs_int_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+    contrib = dabs(aos_in_r_array(i,ipoint)) * weight
+    ao_abs_int_grid(i) += contrib 
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_overlap_abs_grid, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_overlap_abs_grid(j,i) = \int dr |phi_i(r) phi_j(r)| 
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_overlap_abs_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    ao_overlap_abs_grid(j,i) += contrib 
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_center(1:3,j,i) = \int dr |phi_i(r) phi_j(r)| x/y/z / \int |phi_i(r) phi_j(r)|
+!
+! if \int |phi_i(r) phi_j(r)| < 1.d-10 then ao_prod_center = 10000.
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_prod_center = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    do m = 1, 3
+     ao_prod_center(m,j,i) += contrib * r(m)
+    enddo
+   enddo
+  enddo
+ enddo
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+    do m = 1, 3
+     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+    enddo
+   else
+    do m = 1, 3
+     ao_prod_center(m,j,i) = 10000.d0
+    enddo
+   endif
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_abs_r, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_abs_r(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
+!
+ END_DOC
+ ao_prod_abs_r = 0.d0
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3),contrib_x2
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    contrib_x2 = 0.d0
+    do m = 1, 3
+     contrib_x2 += (r(m) - ao_prod_center(m,j,i)) * (r(m) - ao_prod_center(m,j,i)) 
+    enddo
+    contrib_x2 = dsqrt(contrib_x2)
+    ao_prod_abs_r(j,i) += contrib * contrib_x2
+   enddo
+  enddo
+ enddo
+
+
+END_PROVIDER 
+
+ BEGIN_PROVIDER [double precision, ao_prod_sigma, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! Gaussian exponent reproducing the product |chi_i(r) chi_j(r)| 
+!
+! Therefore |chi_i(r) chi_j(r)|  \approx e^{-ao_prod_sigma(j,i) (r - ao_prod_center(1:3,j,i))**2}
+ END_DOC
+ integer :: i,j
+ double precision :: pi,alpha
+ pi = dacos(-1.d0)
+ do i = 1, ao_num
+  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-5)then
+     alpha = 1.d0/pi * (2.d0*ao_overlap_abs_grid(j,i)/ao_prod_abs_r(j,i))**2
+     ao_prod_sigma(j,i) = alpha
+!   endif
+  enddo
+ enddo
+ END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! ao_prod_dist_grid(j,i,ipoint) = distance between the center of |phi_i(r) phi_j(r)| and the grid point r(ipoint)
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: distance,r(3)
+ do ipoint = 1, n_points_final_grid
+  r(:) = final_grid_points(:,ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    distance = 0.d0
+    do m = 1, 3
+     distance += (ao_prod_center(m,j,i) - r(m))*(ao_prod_center(m,j,i) - r(m))
+    enddo
+    distance = dsqrt(distance)
+    ao_prod_dist_grid(j,i,ipoint)  = distance
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+
+!BEGIN_PROVIDER [ double precision, ao_abs_prod_j1b, (ao_num, ao_num)]
+! implicit none
+!
+!END_PROVIDER 
diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f
index d7d8fa7d..cb698fbb 100644
--- a/src/hartree_fock/fock_matrix_hf.irp.f
+++ b/src/hartree_fock/fock_matrix_hf.irp.f
@@ -1,12 +1,27 @@
+! ---
 
  BEGIN_PROVIDER [ double precision, ao_two_e_integral_alpha, (ao_num, ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta ,  (ao_num, ao_num) ]
- use map_module
- implicit none
+&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta , (ao_num, ao_num) ]
+
  BEGIN_DOC
- ! Alpha and Beta Fock matrices in AO basis set
+ !
+ ! 2-e part of alpha and beta Fock matrices (F^{a} & F^{b}) in AO basis set
+ ! 
+ ! F^{a} = h + G^{a}
+ ! F^{b} = h + G^{b}
+ ! 
+ ! where : 
+ !         F^{a} = J^{a} + J^{b} - K^{a} ==> G_{ij}^{a} = \sum_{k,l} P_{kl} (kl|ij) - P_{kl}^{a} (ki|lj)
+ !         F^{b} = J^{a} + J^{b} - K^{b} ==> G_{ij}^{b} = \sum_{k,l} P_{kl} (kl|ij) - P_{kl}^{b} (ki|lj)
+ ! 
+ ! and P_{kl} = P_{kl}^{a} + P_{kl}^{b} 
+ !
  END_DOC
 
+ use map_module
+
+ implicit none
+
  integer                        :: i,j,k,l,k1,r,s
  integer                        :: i0,j0,k0,l0
  integer*8                      :: p,q
@@ -153,6 +168,8 @@
 
 END_PROVIDER
 
+! ---
+
  BEGIN_PROVIDER [ double precision, Fock_matrix_ao_alpha, (ao_num, ao_num) ]
 &BEGIN_PROVIDER [ double precision, Fock_matrix_ao_beta,  (ao_num, ao_num) ]
  implicit none
diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f
index 3226073d..8cbf9dd0 100644
--- a/src/hartree_fock/scf.irp.f
+++ b/src/hartree_fock/scf.irp.f
@@ -68,20 +68,29 @@ subroutine create_guess
   endif
 end
 
-subroutine run
+! ---
+
+subroutine run()
 
   BEGIN_DOC
-!   Run SCF calculation
+  ! Run SCF calculation
   END_DOC
 
   use bitmasks
   implicit none
 
-  integer                        :: i_it, i, j, k
-
   mo_label = 'Orthonormalized'
 
-  call Roothaan_Hall_SCF
+  PROVIDE scf_algorithm
+
+  if(scf_algorithm .eq. "DIIS") then
+    call Roothaan_Hall_SCF()
+  elseif(scf_algorithm .eq. "Simple") then
+    call Roothaan_Hall_SCF_Simple()
+  else
+    print *, scf_algorithm, ' not implemented yet'
+  endif
+
   call ezfio_set_hartree_fock_energy(SCF_energy)
 
 end
diff --git a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
index bb585f63..5e7ef7e9 100644
--- a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
+++ b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
@@ -17,7 +17,7 @@ program debug_integ_jmu_modif
 
   PROVIDE mu_erf j1b_pen
 
-  call test_v_ij_u_cst_mu_j1b()
+!  call test_v_ij_u_cst_mu_j1b()
 !  call test_v_ij_erf_rk_cst_mu_j1b()
 !  call test_x_v_ij_erf_rk_cst_mu_j1b()
 !  call test_int2_u2_j1b2()
@@ -31,6 +31,9 @@ program debug_integ_jmu_modif
 !  call test_u12_grad1_u12_j1b_grad1_j1b()
 !  !call test_gradu_squared_u_ij_mu()
 
+  !call test_vect_overlap_gauss_r12_ao()
+  call test_vect_overlap_gauss_r12_ao_with1s()
+
 end
 
 ! ---
@@ -303,7 +306,7 @@ subroutine test_int2_grad1_u12_ao()
 
         call num_int2_grad1_u12_ao(i, j, ipoint, integ)
 
-        i_exc  = int2_grad1_u12_ao(1,i,j,ipoint) 
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,1) 
         i_num  = integ(1)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -315,7 +318,7 @@ subroutine test_int2_grad1_u12_ao()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = int2_grad1_u12_ao(2,i,j,ipoint) 
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,2) 
         i_num  = integ(2)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -327,7 +330,7 @@ subroutine test_int2_grad1_u12_ao()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = int2_grad1_u12_ao(3,i,j,ipoint) 
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,3) 
         i_num  = integ(3)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -379,7 +382,7 @@ subroutine test_int2_u_grad1u_total_j1b2()
 
         call num_int2_u_grad1u_total_j1b2(i, j, ipoint, integ)
 
-        i_exc  = x * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(1,i,j,ipoint) 
+        i_exc  = x * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,1)
         i_num  = integ(1)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -391,7 +394,7 @@ subroutine test_int2_u_grad1u_total_j1b2()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = y * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(2,i,j,ipoint) 
+        i_exc  = y * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,2) 
         i_num  = integ(2)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -403,7 +406,7 @@ subroutine test_int2_u_grad1u_total_j1b2()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = z * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(3,i,j,ipoint) 
+        i_exc  = z * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,3) 
         i_num  = integ(3)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -595,7 +598,183 @@ subroutine test_u12_grad1_u12_j1b_grad1_j1b()
   print*, ' normalz = ', normalz
 
   return
-end subroutine test_u12_grad1_u12_j1b_grad1_j1b,
+end subroutine test_u12_grad1_u12_j1b_grad1_j1b
 
 ! ---
 
+subroutine test_vect_overlap_gauss_r12_ao()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao
+
+  print *, ' test_vect_overlap_gauss_r12_ao ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit = expo_gauss_j_mu_x_2(1)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao(r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
+! ---
+
+subroutine test_vect_overlap_gauss_r12_ao_with1s()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3), beta, B_center(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print *, ' test_vect_overlap_gauss_r12_ao_with1s ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit    = expo_gauss_j_mu_x_2(1)
+  beta        = List_all_comb_b3_expo  (2)
+  B_center(1) = List_all_comb_b3_cent(1,2)
+  B_center(2) = List_all_comb_b3_cent(2,2)
+  B_center(3) = List_all_comb_b3_cent(3,2)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f
index 4e70bc5c..81a8fe71 100644
--- a/src/non_h_ints_mu/grad_squared.irp.f
+++ b/src/non_h_ints_mu/grad_squared.irp.f
@@ -70,9 +70,9 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi
 
           gradu_squared_u_ij_mu(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)            &
                                             + tmp2 * int2_u2_j1b2             (i,j,ipoint)            &
-                                            + tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(1,i,j,ipoint) &
-                                            + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(2,i,j,ipoint) &
-                                            + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+                                            + tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) &
+                                            + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) &
+                                            + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3)
         enddo
       enddo
     enddo
@@ -104,11 +104,11 @@ END_PROVIDER
 
 ! ---
 
-!BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+!BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
 !
 !  BEGIN_DOC
 !  !
-!  ! tc_grad_square_ao(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+!  ! tc_grad_square_ao_loop(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
 !  !
 !  END_DOC
 !
@@ -142,8 +142,8 @@ END_PROVIDER
 !    do l = 1, ao_num
 !      do i = 1, ao_num
 !        do k = 1, ao_num
-!          tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
-!          !write(11,*) tc_grad_square_ao(k,i,l,j)
+!          tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+!          !write(11,*) tc_grad_square_ao_loop(k,i,l,j)
 !        enddo
 !      enddo
 !    enddo
@@ -155,19 +155,23 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
   !
-  ! tc_grad_square_ao(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  ! tc_grad_square_ao_loop(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
   !
   END_DOC
 
   implicit none
   integer                       :: ipoint, i, j, k, l
   double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: time0, time1
   double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:)
 
+  print*, ' providing tc_grad_square_ao_loop ...'
+  call wall_time(time0)
+
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
   allocate(bc_mat(ao_num,ao_num,ao_num,ao_num))
@@ -177,10 +181,12 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
     weight1 = final_weight_at_r_vector(ipoint)
 
     do i = 1, ao_num
-      ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+      !ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+      ao_i_r = weight1 * aos_in_r_array(i,ipoint)
 
       do k = 1, ao_num
-        ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+        !ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+        ao_ik_r = ao_i_r * aos_in_r_array(k,ipoint)
 
         do j = 1, ao_num
           do l = 1, ao_num
@@ -196,7 +202,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
     do l = 1, ao_num
       do i = 1, ao_num
         do k = 1, ao_num
-          tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j)
+          tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j)
         enddo
       enddo
     enddo
@@ -205,6 +211,9 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
   deallocate(ac_mat)
   deallocate(bc_mat)
 
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_loop = ', time1 - time0
+
 END_PROVIDER 
 
 ! ---
@@ -328,9 +337,9 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num,
 
         tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
 
-        u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(1,i,j,ipoint) &
-                                                + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(2,i,j,ipoint) &
-                                                + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+        u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) &
+                                                + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) &
+                                                + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3)
       enddo
     enddo
   enddo
@@ -342,3 +351,86 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao ...'
+  call wall_time(time0)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (i, k, ipoint) & 
+ !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  tmp = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (j, l, ipoint) & 
+ !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12)
+ !$OMP DO SCHEDULE (static)
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do l = 1, ao_num
+        tmp(l,j,ipoint) = u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) + 0.5d0 * grad12_j12(l,j,ipoint)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+
+  ac_mat = 0.d0
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+            , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+            , 1.d0, ac_mat, ao_num*ao_num)
+  deallocate(tmp, b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
new file mode 100644
index 00000000..180c9588
--- /dev/null
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -0,0 +1,221 @@
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao_test(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r,contrib,contrib2
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao_test ...'
+  call wall_time(time0)
+
+  provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (i, k, ipoint) & 
+ !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  tmp = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (j, l, ipoint) & 
+ !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test)
+ !$OMP DO SCHEDULE (static)
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do l = 1, ao_num
+        tmp(l,j,ipoint) = u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) + 0.5d0 * grad12_j12_test(l,j,ipoint)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+            , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+            , 1.d0, ac_mat, ao_num*ao_num)
+  deallocate(tmp, b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_square_ao_test, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_final_grid) ]
+
+  implicit none
+  integer                    :: ipoint, i, j
+  double precision           :: tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1
+  double precision           :: time0, time1
+
+  print*, ' providing u12sq_j1bsq_test ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+    tmp1  = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        u12sq_j1bsq_test(i,j,ipoint) = tmp1 * int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...'
+
+  provide int2_u_grad1u_x_j1b2_test
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+
+    x     = final_grid_points(1,ipoint)
+    y     = final_grid_points(2,ipoint)
+    z     = final_grid_points(3,ipoint)
+    tmp_v = v_1b       (ipoint)
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+
+    tmp3 = tmp_v * tmp_x
+    tmp4 = tmp_v * tmp_y
+    tmp5 = tmp_v * tmp_z
+
+    tmp6 = -x * tmp3
+    tmp7 = -y * tmp4
+    tmp8 = -z * tmp5
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        tmp9 = int2_u_grad1u_j1b2_test(i,j,ipoint)
+
+        u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,1) &
+                                                     + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,2) &
+                                                     + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: r(3), delta, coef
+  double precision           :: tmp1
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+  provide int2_grad1u2_grad2u2_j1b2_test
+  print*, ' providing grad12_j12_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      tmp1 = v_1b(ipoint)
+      tmp1 = tmp1 * tmp1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          grad12_j12_test(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    grad12_j12_test = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            grad12_j12_test(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for grad12_j12_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f
index f3b68f43..a515e0b8 100644
--- a/src/non_h_ints_mu/j12_nucl_utils.irp.f
+++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f
@@ -237,6 +237,23 @@ end function j12_mu
 
 ! ---
 
+double precision function j12_mu_r12(r12)
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: r12
+  double precision             :: mu_r12
+
+  mu_r12 = mu_erf * r12
+
+  j12_mu_r12 = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf
+
+  return
+end function j12_mu_r12
+
+! ---
+
 double precision function j12_mu_gauss(r1, r2)
 
   implicit none
diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f
index d34e629c..9aef436f 100644
--- a/src/non_h_ints_mu/new_grad_tc.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc.irp.f
@@ -1,22 +1,21 @@
-  
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)]
 
   BEGIN_DOC
   !
-  ! int2_grad1_u12_ao(:,i,j,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
   !
   ! where r1 = r(ipoint)
   !
   ! if J(r1,r2) = u12:
   !
-  ! int2_grad1_u12_ao(:,i,j,ipoint) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
   !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
   !
   ! if J(r1,r2) = u12 x v1 x v2
   !
-  ! int2_grad1_u12_ao(:,i,j,ipoint) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  ! int2_grad1_u12_ao(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
   !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
   !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
   !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
@@ -25,6 +24,95 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_poin
   !
   END_DOC
 
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: time0, time1
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
+
+          int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+          int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+          int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+          int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,1)
+          int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,2)
+          int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,3)
+        enddo
+      enddo
+    enddo
+
+    int2_grad1_u12_ao *= 0.5d0
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for int2_grad1_u12_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1)
+  !                                 = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !                                 = -int2_grad1_u12_ao(i,j,ipoint,:)
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) =      v2    x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ]
+  !                                 - \grad_2 v2 x [       \int dr1                   u12              v1 \phi_i(r1) \phi_j(r1) ] 
+  !                                 =   -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 +    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
   implicit none
   integer          :: ipoint, i, j
   double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
@@ -49,32 +137,16 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_poin
           tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
           tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
 
-          int2_grad1_u12_ao(1,i,j,ipoint) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint) - tmp2 * tmp_x
-          int2_grad1_u12_ao(2,i,j,ipoint) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint) - tmp2 * tmp_y
-          int2_grad1_u12_ao(3,i,j,ipoint) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint) - tmp2 * tmp_z
+          int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+          int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+          int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
         enddo
       enddo
     enddo
 
   else
 
-    do ipoint = 1, n_points_final_grid
-      x = final_grid_points(1,ipoint)
-      y = final_grid_points(2,ipoint)
-      z = final_grid_points(3,ipoint)
-
-      do j = 1, ao_num
-        do i = 1, ao_num
-          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
-
-          int2_grad1_u12_ao(1,i,j,ipoint) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(1,i,j,ipoint)
-          int2_grad1_u12_ao(2,i,j,ipoint) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(2,i,j,ipoint)
-          int2_grad1_u12_ao(3,i,j,ipoint) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(3,i,j,ipoint)
-        enddo
-      enddo
-    enddo
-
-    int2_grad1_u12_ao *= 0.5d0
+    int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao
 
   endif
 
@@ -82,11 +154,11 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
   !
-  ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  ! tc_grad_and_lapl_ao_loop(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
   !
   ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
   !
@@ -98,33 +170,48 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
   integer                       :: ipoint, i, j, k, l
   double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
   double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: ao_j_r, ao_l_r, ao_l_dx, ao_l_dy, ao_l_dz
+  double precision              :: time0, time1
   double precision, allocatable :: ac_mat(:,:,:,:)
 
+  print*, ' providing tc_grad_and_lapl_ao_loop ...'
+  call wall_time(time0)
+
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
 
+  ! ---
+
   do ipoint = 1, n_points_final_grid
     weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
 
     do i = 1, ao_num
-      ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
-      ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
-      ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
-      ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+      !ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
+      !ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
+      !ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
+      !ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+      ao_i_r  = weight1 * aos_in_r_array     (i,ipoint)
+      ao_i_dx = weight1 * aos_grad_in_r_array(i,ipoint,1)
+      ao_i_dy = weight1 * aos_grad_in_r_array(i,ipoint,2)
+      ao_i_dz = weight1 * aos_grad_in_r_array(i,ipoint,3)
 
       do k = 1, ao_num
-        ao_k_r = aos_in_r_array_transp(ipoint,k)
+        !ao_k_r = aos_in_r_array_transp(ipoint,k)
+        ao_k_r = aos_in_r_array(k,ipoint)
 
-        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
-        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
-        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
+        !tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
+        !tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
+        !tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
+        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array(k,ipoint,1) 
+        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array(k,ipoint,2) 
+        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array(k,ipoint,3) 
 
         do j = 1, ao_num
           do l = 1, ao_num
 
-            contrib_x = int2_grad1_u12_ao(1,l,j,ipoint) * tmp_x 
-            contrib_y = int2_grad1_u12_ao(2,l,j,ipoint) * tmp_y 
-            contrib_z = int2_grad1_u12_ao(3,l,j,ipoint) * tmp_z 
+            contrib_x = int2_grad1_u12_ao(l,j,ipoint,1) * tmp_x 
+            contrib_y = int2_grad1_u12_ao(l,j,ipoint,2) * tmp_y 
+            contrib_z = int2_grad1_u12_ao(l,j,ipoint,3) * tmp_z 
 
             ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
           enddo
@@ -132,7 +219,122 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
       enddo
     enddo
   enddo
+
+  ! ---
+
+  !do ipoint = 1, n_points_final_grid
+  !  weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+
+  !  do l = 1, ao_num
+  !    ao_l_r  = weight1 * aos_in_r_array_transp         (ipoint,l)
+  !    ao_l_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,1)
+  !    ao_l_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,2)
+  !    ao_l_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,3)
+
+  !    do j = 1, ao_num
+  !      ao_j_r = aos_in_r_array_transp(ipoint,j)
+
+  !      tmp_x = ao_j_r * ao_l_dx - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,1) 
+  !      tmp_y = ao_j_r * ao_l_dy - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,2) 
+  !      tmp_z = ao_j_r * ao_l_dz - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,3) 
+
+  !      do i = 1, ao_num
+  !        do k = 1, ao_num
+
+  !          contrib_x = int2_grad1_u12_ao(k,i,ipoint,1) * tmp_x 
+  !          contrib_y = int2_grad1_u12_ao(k,i,ipoint,2) * tmp_y 
+  !          contrib_z = int2_grad1_u12_ao(k,i,ipoint,3) * tmp_z 
+
+  !          ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  ! ---
  
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+          !tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_loop = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l, m
+  double precision              :: weight1, ao_k_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao ...'
+  call wall_time(time0)
+
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL                                                              &
+ !$OMP DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+ !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+ !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+
+        weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+        ao_i_r  = aos_in_r_array_transp(ipoint,i)
+        ao_k_r  = aos_in_r_array_transp(ipoint,k)
+
+        b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) 
+        b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) 
+        b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  do m = 1, 3
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0              &
+              , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num) 
+
+  enddo
+  deallocate(b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao, ao_num)
+ !$OMP DO SCHEDULE (static)
   do j = 1, ao_num
     do l = 1, ao_num
       do i = 1, ao_num
@@ -142,10 +344,16 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
       enddo
     enddo
   enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
 
   deallocate(ac_mat)
 
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0
+
 END_PROVIDER 
 
 ! ---
 
+
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
new file mode 100644
index 00000000..4d85e061
--- /dev/null
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -0,0 +1,174 @@
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
+  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: time0, time1
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1) - tmp2 * tmp_x
+          int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2) - tmp2 * tmp_y
+          int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,1)
+          int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,2)
+          int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,3)
+        enddo
+      enddo
+    enddo
+
+    int2_grad1_u12_ao_test *= 0.5d0
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for int2_grad1_u12_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l, m
+  double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
+  double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao_test ...'
+  call wall_time(time0)
+
+  provide int2_grad1_u12_ao_test 
+ 
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL                                                              &
+ !$OMP DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+ !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+ !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+
+        weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+        ao_i_r  = aos_in_r_array_transp(ipoint,i)
+        ao_k_r  = aos_in_r_array_transp(ipoint,k)
+
+        b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1))
+        b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2))
+        b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3))
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  do m = 1, 3
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0                   &
+              , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num)
+
+  enddo
+  deallocate(b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f
index 979296d1..81747553 100644
--- a/src/non_h_ints_mu/total_tc_int.irp.f
+++ b/src/non_h_ints_mu/total_tc_int.irp.f
@@ -7,17 +7,22 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao
   integer          :: i, j, k, l
   double precision :: wall1, wall0
 
+  print *, ' providing ao_tc_int_chemist ...'
   call wall_time(wall0)
-
-  do j = 1, ao_num
-    do l = 1, ao_num
-      do i = 1, ao_num
-        do k = 1, ao_num
-          ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
-        enddo
-      enddo
-    enddo
-  enddo
+  
+  if(test_cycle_tc)then
+   ao_tc_int_chemist = ao_tc_int_chemist_test
+  else
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+  endif
 
   call wall_time(wall1)
   print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0
@@ -26,6 +31,32 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: wall1, wall0
+
+  print *, ' providing ao_tc_int_chemist_test ...'
+  call wall_time(wall0)
+
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_and_lapl_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for ao_tc_int_chemist_test ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
 BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num) ]
 
   BEGIN_DOC
diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f
index df1eb71d..78fddf54 100644
--- a/src/non_hermit_dav/biorthog.irp.f
+++ b/src/non_hermit_dav/biorthog.irp.f
@@ -283,16 +283,16 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   ! -------------------------------------------------------------------------------------
   !
 
-  print *, ' '
-  print *, ' Computing the left/right eigenvectors ...'
-  print *, ' '
+  !print *, ' '
+  !print *, ' Computing the left/right eigenvectors ...'
+  !print *, ' '
 
-  allocate( WR(n), WI(n), VL(n,n), VR(n,n) ) 
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n)) 
   
-  print *, ' fock matrix'
-  do i = 1, n
-    write(*, '(1000(F16.10,X))') A(i,:)
-  enddo
+  !print *, ' fock matrix'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') A(i,:)
+  !enddo
 
   !thr_cut = 1.d-15
   !call cancel_small_elmts(A, n, thr_cut)
@@ -301,11 +301,11 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   call lapack_diag_non_sym(n, A, WR, WI, VL, VR)
   !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR)
 
-  print *, ' '
-  print *, ' eigenvalues'
-  do i = 1, n
-    write(*, '(1000(F16.10,X))') WR(i), WI(i)
-  enddo
+  !print *, ' '
+  !print *, ' eigenvalues'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') WR(i), WI(i)
+  !enddo
   !print *, ' right eigenvect bef' 
   !do i = 1, n
   !  write(*, '(1000(F16.10,X))') VR(:,i)
@@ -328,9 +328,10 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   !                  track & sort the real eigenvalues 
 
   n_good = 0
-  thr    = 1.d-3
+  !thr    = 100d0
+  thr    = Im_thresh_tcscf
   do i = 1, n
-    print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    !print*, 'Re(i) + Im(i)', WR(i), WI(i)
     if(dabs(WI(i)) .lt. thr) then
       n_good += 1
     else
@@ -404,23 +405,24 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
 
   if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .lt. thr_d) ) then
 
-    print *, ' lapack vectors are normalized and bi-orthogonalized'
+    !print *, ' lapack vectors are normalized and bi-orthogonalized'
     deallocate(S)
     return
 
-  elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
+  ! accu_nd is modified after adding the normalization
+  !elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
 
-    print *, ' lapack vectors are not normalized but bi-orthogonalized'
-    call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
+  !  print *, ' lapack vectors are not normalized but bi-orthogonalized'
+  !  call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
 
-    call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
+  !  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
 
-    deallocate(S)
-    return
+  !  deallocate(S)
+  !  return
 
   else
 
-    print *, ' lapack vectors are not normalized neither bi-orthogonalized'
+    !print *, ' lapack vectors are not normalized neither bi-orthogonalized'
 
     ! ---
 
@@ -442,8 +444,8 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
     endif
     call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
 
-    !call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec)
-    !call impose_biorthog_lu(n, n_real_eigv, leigvec, reigvec)
+    !call impose_biorthog_qr(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec)
+    !call impose_biorthog_lu(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec)
 
     ! ---
 
@@ -609,7 +611,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva
   enddo
   accu_nd = dsqrt(accu_nd)
 
-  if(accu_nd .lt. 1d-8) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     ! L x R is already bi-orthogonal
 
     print *, ' L & T bi-orthogonality: ok'
@@ -621,7 +623,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva
 
     print *, ' L & T bi-orthogonality: not imposed yet'
     print *, ' accu_nd = ', accu_nd
-    call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec)
+    call impose_biorthog_qr(n, n_real_eigv, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec)
     deallocate( S )
   
   endif
@@ -631,7 +633,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva
 
   return
 
-end 
+end subroutine non_hrmt_bieig_random_diag
 
 ! ---
 
@@ -959,7 +961,7 @@ subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval)
   enddo
   accu_nd = dsqrt(accu_nd)
 
-  if( accu_nd .lt. 1d-8 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     ! L x R is already bi-orthogonal
 
     !print *, ' L & T bi-orthogonality: ok'
@@ -971,7 +973,7 @@ subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval)
 
     !print *, ' L & T bi-orthogonality: not imposed yet'
     !print *, ' accu_nd = ', accu_nd
-    call impose_biorthog_qr(n, n, leigvec, reigvec)
+    call impose_biorthog_qr(n, n, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec)
     deallocate( S )
   
   endif
diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
index 53c62ce8..0d652af4 100644
--- a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
+++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
@@ -930,7 +930,7 @@ subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, s
 
     tmp_abs = tmp_abs + tmp
     V_nrm   = V_nrm   + U_nrm 
-    write(*,'(I4,X,(100(F25.16,X)))')j,eigval(j), tmp, U_nrm
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
 
   enddo
 
@@ -973,7 +973,7 @@ subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, s
 
     tmp_abs = tmp_abs + tmp
     V_nrm   = V_nrm   + U_nrm 
-    write(*,'(I4,X,(100(F25.16,X)))')j,eigval(j), tmp, U_nrm
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
 
   enddo
 
@@ -1082,7 +1082,7 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
   double precision, allocatable   :: S(:,:), tmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
-  print *, ' apply SVD to orthogonalize & normalize weighted vectors'
+  !print *, ' apply SVD to orthogonalize & normalize weighted vectors'
 
   ! ---
 
@@ -1097,10 +1097,10 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1160,10 +1160,10 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   deallocate(S)
 
@@ -1185,7 +1185,7 @@ subroutine impose_orthog_svd(n, m, C)
   double precision, allocatable   :: S(:,:), tmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
-  print *, ' apply SVD to orthogonalize & normalize vectors'
+  !print *, ' apply SVD to orthogonalize & normalize vectors'
 
   ! ---
 
@@ -1196,10 +1196,10 @@ subroutine impose_orthog_svd(n, m, C)
             , C, size(C, 1), C, size(C, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' eigenvec overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1224,6 +1224,7 @@ subroutine impose_orthog_svd(n, m, C)
   if(num_linear_dependencies > 0) then
     write(*,*) ' linear dependencies = ', num_linear_dependencies
     write(*,*) ' m                   = ', m
+    write(*,*) ' try with Graham-Schmidt'
     stop
   endif
 
@@ -1256,10 +1257,10 @@ subroutine impose_orthog_svd(n, m, C)
             , C, size(C, 1), C, size(C, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' eigenvec overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   deallocate(S)
 
@@ -1296,10 +1297,10 @@ subroutine impose_orthog_svd_overlap(n, m, C, overlap)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' eigenvec overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1358,10 +1359,10 @@ subroutine impose_orthog_svd_overlap(n, m, C, overlap)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' eigenvec overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
   deallocate(S)
 
 end subroutine impose_orthog_svd_overlap
@@ -1528,11 +1529,11 @@ subroutine impose_orthog_degen_eigvec(n, e0, C0)
   enddo
 
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -1677,7 +1678,7 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
   double precision                :: accu_d, accu_nd, s_tmp
   double precision, allocatable   :: S(:,:)
 
-  print *, ' check bi-orthonormality'
+  !print *, ' check bi-orthonormality'
 
   ! ---
 
@@ -1714,15 +1715,19 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
     enddo
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
-  print*, '    diag acc: ', accu_d
-  print*, ' nondiag acc: ', accu_nd
+  !print*, '    diag acc bef = ', accu_d
+  !print*, ' nondiag acc bef = ', accu_nd
 
   ! ---
 
   if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
 
     do i = 1, m
-      print *, i, S(i,i)
+      if(S(i,i) <= 0.d0) then
+        print *, ' overap negative'
+        print *, i, S(i,i)
+        exit
+      endif
       if(dabs(S(i,i) - 1.d0) .gt. thr_d) then
         s_tmp = 1.d0 / dsqrt(S(i,i))
         do j = 1, n
@@ -1757,8 +1762,8 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
     enddo
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
-  print *, '    diag acc: ', accu_d
-  print *, ' nondiag acc: ', accu_nd
+  !print *, '    diag acc aft = ', accu_d
+  !print *, ' nondiag acc aft = ', accu_nd
 
   deallocate(S)
 
@@ -1801,10 +1806,10 @@ subroutine check_weighted_biorthog(n, m, W, Vl, Vr, thr_d, thr_nd, accu_d, accu_
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1852,17 +1857,18 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_
   integer                       :: i, j
   double precision, allocatable :: SS(:,:)
 
-  print *, ' check bi-orthogonality'
+  !print *, ' check bi-orthogonality'
 
   ! ---
 
   call dgemm( 'T', 'N', m, m, n, 1.d0          &
             , Vl, size(Vl, 1), Vr, size(Vr, 1) &
             , 0.d0, S, size(S, 1) )
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
+
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1877,12 +1883,12 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
 
-  print *, ' accu_nd = ', accu_nd
-  print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+  !print *, ' accu_nd = ', accu_nd
+  !print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
 
   ! ---
 
-  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+  if(stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) then
     print *, ' non bi-orthogonal vectors !'
     print *, ' accu_nd = ', accu_nd
     print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
@@ -1912,12 +1918,12 @@ subroutine check_orthog(n, m, V, accu_d, accu_nd, S)
             , V, size(V, 1), V, size(V, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ''
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
-  print *, ''
+  !print *, ''
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+  !print *, ''
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1981,11 +1987,11 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0)
     enddo
   enddo
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i), e0(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i), e0(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -2181,11 +2187,11 @@ subroutine impose_unique_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, C0, W0, L0,
     enddo
   enddo
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -2414,10 +2420,10 @@ subroutine impose_biorthog_svd(n, m, L, R)
             , L, size(L, 1), R, size(R, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -2489,10 +2495,11 @@ subroutine impose_biorthog_svd(n, m, L, R)
             , L, size(L, 1), R, size(R, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
   deallocate(S)
 
   ! ---
@@ -2806,10 +2813,10 @@ subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F25.16,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F25.16,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -2886,10 +2893,11 @@ subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' overlap aft SVD with overlap: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD with overlap: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
   deallocate(S)
 
   return
diff --git a/src/non_hermit_dav/new_routines.irp.f b/src/non_hermit_dav/new_routines.irp.f
index 07ac5917..4dea5f66 100644
--- a/src/non_hermit_dav/new_routines.irp.f
+++ b/src/non_hermit_dav/new_routines.irp.f
@@ -132,9 +132,9 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei
 
 !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
   !                               check bi-orthogonality
-  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else
     print *, ' '
@@ -149,14 +149,14 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei
     deallocate(S_nh_inv_half)
     call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp)
     call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp)
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
-    if( accu_nd .lt. 1d-10 ) then
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
      call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S)
-     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
-     if( accu_nd .lt. 1d-10 ) then
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+     if(accu_nd .lt. thresh_biorthog_nondiag) then
        print *, ' bi-orthogonality: ok'
      else 
       print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -200,10 +200,10 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei
   shift_current = max(1.d-10,shift_current)
   print*,'Thr for eigenvectors = ',shift_current
   call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
-  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
   
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else 
    print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
@@ -354,14 +354,14 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
 
 !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
   !                               check bi-orthogonality
-  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else
     print *, ' '
     print *, ' bi-orthogonality: not imposed yet'
-    if(complex_root)then 
+    if(complex_root) then 
      print *, ' '
      print *, ' '
      print *, ' orthog between degen eigenvect' 
@@ -369,9 +369,9 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
      ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
      call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) ! orthogonalization of reigvec
      call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) ! orthogonalization of leigvec
-     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) 
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S,  thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) 
 
-     if( accu_nd .lt. 1d-10 ) then
+     if(accu_nd .lt. thresh_biorthog_nondiag) then
        print *, ' bi-orthogonality: ok'
      else 
       print*,'New vectors not bi-orthonormals at ', accu_nd
@@ -387,8 +387,8 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
      print*,'S^{-1/2} exists !!'
      call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
     endif
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
-    if( accu_nd .lt. 1d-10 ) then
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -431,10 +431,10 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
   shift_current = max(1.d-10,shift_current)
   print*,'Thr for eigenvectors = ',shift_current
   call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
-  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
   
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else 
    print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
@@ -472,6 +472,7 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
   double precision :: accu,thr_cut
   double precision, allocatable :: S_nh_inv_half(:,:)
   logical :: complex_root
+  double precision :: thr_norm=1d0
 
 
   thr_cut = 1.d-15
@@ -580,9 +581,9 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
 
 !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
   !                               check bi-orthogonality
-  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else
     print *, ' '
@@ -593,9 +594,9 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
     print *, ' '
     ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
     call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, leigvec_tmp, reigvec_tmp)
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
     print*,'accu_nd = ',accu_nd
-    if( accu_nd .lt. 1d-10 ) then
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -608,8 +609,8 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
       call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
      endif
     endif
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
-    if( accu_nd .lt. 1d-10 ) then
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -651,11 +652,11 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
   print*,'Checking for final reigvec/leigvec'
   shift_current = max(1.d-10,shift_current)
   print*,'Thr for eigenvectors = ',shift_current
-  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec,shift_current)
-  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S)
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
   
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else 
    print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
diff --git a/src/scf_utils/diagonalize_fock.irp.f b/src/scf_utils/diagonalize_fock.irp.f
index 5188581a..008344f2 100644
--- a/src/scf_utils/diagonalize_fock.irp.f
+++ b/src/scf_utils/diagonalize_fock.irp.f
@@ -20,6 +20,12 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num)
      enddo
    enddo
 
+  !print *, ' Fock_matrix_MO :'
+  !do i = 1, mo_num
+  !  write(*, '(100(f15.7, 2x))') (Fock_matrix_MO(j,i), j = 1, mo_num)
+  !enddo
+
+
    if(frozen_orb_scf)then
      integer                        :: iorb,jorb
      do i = 1, n_core_orb
@@ -57,7 +63,6 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num)
    do i = elec_beta_num+1, elec_alpha_num
      F(i,i) += 0.5d0*level_shift
    enddo
-
    do i = elec_alpha_num+1, mo_num
      F(i,i) += level_shift
    enddo
@@ -90,6 +95,10 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num)
    call dsyevd( 'V', 'U', mo_num, F,                             &
        size(F,1), diag, work, lwork, iwork, liwork, info)
    deallocate(iwork)
+  !print*, ' Fock eigval:'
+  !do i = 1, mo_num
+  !  print *, diag(i)
+  !enddo
 
 
    if (info /= 0) then
diff --git a/src/scf_utils/diis.irp.f b/src/scf_utils/diis.irp.f
index 713de1b3..63a847ce 100644
--- a/src/scf_utils/diis.irp.f
+++ b/src/scf_utils/diis.irp.f
@@ -1,3 +1,5 @@
+! ---
+
 BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero ]
  implicit none
  BEGIN_DOC
@@ -12,6 +14,8 @@ BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero ]
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO, (AO_num, AO_num)]
   implicit none
   BEGIN_DOC
@@ -60,6 +64,8 @@ BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO, (AO_num, AO_num)]
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO, (mo_num, mo_num)]
   implicit none
   begin_doc
@@ -69,6 +75,7 @@ BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO, (mo_num, mo_num)]
      FPS_SPF_Matrix_MO, size(FPS_SPF_Matrix_MO,1))
 END_PROVIDER
 
+! ---
 
  BEGIN_PROVIDER [ double precision, eigenvalues_Fock_matrix_AO, (AO_num) ]
 &BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_AO, (AO_num,AO_num) ]
@@ -137,3 +144,175 @@ END_PROVIDER
 
 END_PROVIDER
 
+! ---
+
+!BEGIN_PROVIDER [double precision, error_diis_Fmo, (ao_num, ao_num)]
+!
+!  BEGIN_DOC
+!  !
+!  ! error_diis_Fmo = (S x C) x [F_mo x \eta_occ - \eta_occ x F_mo] x (S x C).T
+!  !
+!  ! \eta_occ is the matrix of occupation : \eta_occ = \eta_occ(alpha) + \eta_occ(beta)
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: i, j
+!  double precision, allocatable :: tmp(:,:)
+!
+!  provide Fock_matrix_mo
+!
+!  allocate(tmp(mo_num,mo_num))
+!  tmp = 0.d0
+!
+!  ! F_mo x \eta_occ(alpha) - \eta_occ x F_mo(alpha)
+!  do j = 1, elec_alpha_num
+!    do i = elec_alpha_num + 1, mo_num
+!      tmp(i,j) = Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!  do j = elec_alpha_num + 1, mo_num
+!    do i = 1, elec_alpha_num
+!      tmp(i,j) = -Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!
+!  ! F_mo x \eta_occ(beta) - \eta_occ x F_mo(beta)
+!  do j = 1, elec_beta_num
+!    do i = elec_beta_num + 1, mo_num
+!      tmp(i,j) += Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!  do j = elec_beta_num + 1, mo_num
+!    do i = 1, elec_beta_num
+!      tmp(i,j) -= Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!
+!  call mo_to_ao(tmp, size(tmp, 1), error_diis_Fmo, size(error_diis_Fmo, 1))
+!  
+!  deallocate(tmp)
+!
+!END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, error_diis_Fmo, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! error_diis_Fmo = [F_mo x \eta_occ - \eta_occ x F_mo]
+  !
+  ! \eta_occ is the matrix of occupation : \eta_occ = \eta_occ(alpha) + \eta_occ(beta)
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j
+  double precision, allocatable :: tmp(:,:)
+
+  provide Fock_matrix_mo
+
+  error_diis_Fmo = 0.d0
+
+  ! F_mo x \eta_occ(alpha) - \eta_occ x F_mo(alpha)
+  do j = 1, elec_alpha_num
+    do i = elec_alpha_num + 1, mo_num
+      error_diis_Fmo(i,j) += Fock_matrix_mo(i,j)
+    enddo
+  enddo
+  do j = elec_alpha_num + 1, mo_num
+    do i = 1, elec_alpha_num
+      error_diis_Fmo(i,j) -= Fock_matrix_mo(i,j)
+    enddo
+  enddo
+
+  ! F_mo x \eta_occ(beta) - \eta_occ x F_mo(beta)
+  do j = 1, elec_beta_num
+    do i = elec_beta_num + 1, mo_num
+      error_diis_Fmo(i,j) += Fock_matrix_mo(i,j)
+    enddo
+  enddo
+  do j = elec_beta_num + 1, mo_num
+    do i = 1, elec_beta_num
+      error_diis_Fmo(i,j) -= Fock_matrix_mo(i,j)
+    enddo
+  enddo
+
+  !allocate(tmp(ao_num,ao_num))
+  !call mo_to_ao(error_diis_Fmo, size(error_diis_Fmo, 1), tmp, size(tmp, 1))
+  !call ao_to_mo(tmp, size(tmp, 1), error_diis_Fmo, size(error_diis_Fmo, 1))
+  !deallocate(tmp)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO_a, (AO_num, AO_num)]
+
+  implicit none
+  double precision, allocatable  :: scratch(:,:)
+
+  allocate(scratch(AO_num, AO_num))
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                                                 &
+            , Fock_Matrix_AO_alpha, size(Fock_Matrix_AO_alpha, 1), SCF_density_matrix_ao_alpha, size(SCF_Density_Matrix_AO_alpha, 1) &
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                     &
+            , scratch, size(scratch, 1), AO_Overlap, size(AO_Overlap, 1) &
+            , 0.d0, FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                             &
+            , AO_Overlap, size(AO_Overlap, 1), SCF_density_matrix_ao_alpha, size(SCF_density_matrix_ao_alpha, 1) & 
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, -1.d0                                        &
+            , scratch, size(scratch, 1), Fock_Matrix_AO_alpha, size(Fock_Matrix_AO_alpha, 1) &
+            , 1.d0, FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO_b, (AO_num, AO_num)]
+
+  implicit none
+  double precision, allocatable  :: scratch(:,:)
+
+  allocate(scratch(AO_num, AO_num))
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                                             &
+            , Fock_Matrix_AO_beta, size(Fock_Matrix_AO_beta, 1), SCF_density_matrix_ao_beta, size(SCF_Density_Matrix_AO_beta, 1) &
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                     &
+            , scratch, size(scratch, 1), AO_Overlap, size(AO_Overlap, 1) &
+            , 0.d0, FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                           &
+            , AO_Overlap, size(AO_Overlap, 1), SCF_density_matrix_ao_beta, size(SCF_density_matrix_ao_beta, 1) & 
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, -1.d0                                      &
+            , scratch, size(scratch, 1), Fock_Matrix_AO_beta, size(Fock_Matrix_AO_beta, 1) &
+            , 1.d0, FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO_a, (mo_num, mo_num)]
+  implicit none
+  call ao_to_mo(FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1), FPS_SPF_Matrix_MO_a, size(FPS_SPF_Matrix_MO_a, 1))
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO_b, (mo_num, mo_num)]
+  implicit none
+  call ao_to_mo(FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1), FPS_SPF_Matrix_MO_b, size(FPS_SPF_Matrix_MO_b, 1))
+END_PROVIDER
+
+! ---
+
diff --git a/src/scf_utils/fock_matrix.irp.f b/src/scf_utils/fock_matrix.irp.f
index 539f1eb3..baefcd6c 100644
--- a/src/scf_utils/fock_matrix.irp.f
+++ b/src/scf_utils/fock_matrix.irp.f
@@ -267,3 +267,5 @@ BEGIN_PROVIDER [ double precision, SCF_energy ]
 
 END_PROVIDER
 
+! ---
+
diff --git a/src/scf_utils/rh_scf_simple.irp.f b/src/scf_utils/rh_scf_simple.irp.f
new file mode 100644
index 00000000..cd7ba66f
--- /dev/null
+++ b/src/scf_utils/rh_scf_simple.irp.f
@@ -0,0 +1,129 @@
+subroutine Roothaan_Hall_SCF_Simple
+
+BEGIN_DOC
+! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+END_DOC
+
+  implicit none
+
+  integer                        :: iteration_SCF, dim_DIIS
+  double precision               :: energy_SCF,energy_SCF_previous,Delta_energy_SCF
+  double precision               :: max_error_DIIS
+
+  integer                        :: i,j
+  logical, external              :: qp_stop
+  double precision, allocatable :: mo_coef_save(:,:)
+
+  PROVIDE ao_md5 mo_occ level_shift
+
+  allocate(mo_coef_save(ao_num,mo_num))
+
+
+  dim_DIIS     = 0
+  mo_coef_save = 0.d0
+
+  call write_time(6)
+
+  print*,'energy of the guess = ',SCF_energy
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'energy  ', 'energy diff  ',  'DIIS error  ', 'Level shift   '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+
+! Initialize energies and density matrices
+  energy_SCF_previous = SCF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  max_error_DIIS      = 1.d0
+
+  do while ( &
+    ( (max_error_DIIS > threshold_DIIS_nonzero) .or. &
+      (dabs(Delta_energy_SCF) > thresh_SCF) &
+    ) .and. (iteration_SCF < n_it_SCF_max) )
+
+    iteration_SCF += 1
+    if(frozen_orb_scf)then
+     call initialize_mo_coef_begin_iteration
+    endif
+
+    MO_coef = eigenvectors_Fock_matrix_MO
+    if(frozen_orb_scf)then
+     call reorder_core_orb
+     call initialize_mo_coef_begin_iteration
+    endif
+    TOUCH MO_coef
+
+!   Calculate error vectors
+    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
+
+!   SCF energy
+
+    energy_SCF = SCF_energy
+    Delta_energy_SCF = energy_SCF - energy_SCF_previous
+
+    !double precision :: level_shift_save
+    !level_shift_save = level_shift
+    !mo_coef_save(1:ao_num,1:mo_num) = mo_coef(1:ao_num,1:mo_num)
+    !do while (Delta_energy_SCF > 0.d0)
+    !  mo_coef(1:ao_num,1:mo_num) = mo_coef_save
+    !  if (level_shift <= .1d0) then
+    !    level_shift = 1.d0
+    !  else
+    !    level_shift = level_shift * 3.0d0
+    !  endif
+    !  TOUCH mo_coef level_shift
+    !  mo_coef(1:ao_num,1:mo_num) = eigenvectors_Fock_matrix_MO(1:ao_num,1:mo_num)
+    !  if(frozen_orb_scf)then
+    !    call reorder_core_orb
+    !    call initialize_mo_coef_begin_iteration
+    !  endif
+    !  TOUCH mo_coef
+    !  Delta_energy_SCF = SCF_energy - energy_SCF_previous
+    !  energy_SCF = SCF_energy
+    !  if (level_shift-level_shift_save > 40.d0) then
+    !    level_shift = level_shift_save * 4.d0
+    !    SOFT_TOUCH level_shift
+    !    exit
+    !  endif
+    !enddo
+    !level_shift = level_shift * 0.5d0
+    !SOFT_TOUCH level_shift
+
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS
+
+    if(Delta_energy_SCF < 0.d0) then
+      call save_mos()
+    endif
+    if(qp_stop()) exit
+
+  enddo
+
+  if (iteration_SCF < n_it_SCF_max) then
+    mo_label = 'Canonical'
+  endif
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,*)
+
+  if(.not.frozen_orb_scf)then
+   call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo,size(Fock_matrix_mo,1), &
+      size(Fock_matrix_mo,2),mo_label,1,.true.)
+   call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef,1), 1.d-10)
+   call orthonormalize_mos
+   call save_mos
+  endif
+
+  call write_double(6, energy_SCF, 'SCF energy')
+
+  call write_time(6)
+
+end
+
diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f
index 2c35fe0d..9ec61ced 100644
--- a/src/scf_utils/roothaan_hall_scf.irp.f
+++ b/src/scf_utils/roothaan_hall_scf.irp.f
@@ -29,11 +29,11 @@ END_DOC
 
   call write_time(6)
 
-  print*,'Energy of the guess = ',SCF_energy
+  print*,'energy of the guess = ',SCF_energy
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
     '====','================','================','================','================'
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
-    '  N ', 'Energy  ', 'Energy diff  ',  'DIIS error  ', 'Level shift   '
+    '  N ', 'energy  ', 'energy diff  ',  'DIIS error  ', 'Level shift   '
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
     '====','================','================','================','================'
 
@@ -66,7 +66,8 @@ END_DOC
 
     dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
 
-    if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-6) )  then
+    if( (scf_algorithm == 'DIIS') .and. (dabs(Delta_energy_SCF) > 1.d-6))  then
+    !if(scf_algorithm == 'DIIS') then
 
       ! Store Fock and error matrices at each iteration
       index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
@@ -85,10 +86,9 @@ END_DOC
           iteration_SCF,dim_DIIS                                       &
           )
 
-      Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
-      Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
+      Fock_matrix_AO_alpha = Fock_matrix_AO!*0.5d0
+      Fock_matrix_AO_beta  = Fock_matrix_AO!*0.5d0
       TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta
-
     endif
 
     MO_coef = eigenvectors_Fock_matrix_MO
@@ -99,18 +99,14 @@ END_DOC
 
     TOUCH MO_coef
 
-!   Calculate error vectors
-
-    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
-
 !   SCF energy
 
     energy_SCF = SCF_energy
-    Delta_Energy_SCF = energy_SCF - energy_SCF_previous
-    if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then
+    Delta_energy_SCF = energy_SCF - energy_SCF_previous
+    if ( (SCF_algorithm == 'DIIS').and.(Delta_energy_SCF > 0.d0) ) then
       Fock_matrix_AO(1:ao_num,1:ao_num) = Fock_matrix_DIIS (1:ao_num,1:ao_num,index_dim_DIIS)
-      Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
-      Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
+      Fock_matrix_AO_alpha = Fock_matrix_AO!*0.5d0
+      Fock_matrix_AO_beta  = Fock_matrix_AO!*0.5d0
       TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta
     endif
 
@@ -131,19 +127,24 @@ END_DOC
         call initialize_mo_coef_begin_iteration
       endif
       TOUCH mo_coef
-      Delta_Energy_SCF = SCF_energy - energy_SCF_previous
+      Delta_energy_SCF = SCF_energy - energy_SCF_previous
       energy_SCF = SCF_energy
       if (level_shift-level_shift_save > 40.d0) then
         level_shift = level_shift_save * 4.d0
         SOFT_TOUCH level_shift
         exit
       endif
+
       dim_DIIS=0
     enddo
+
     level_shift = level_shift * 0.5d0
     SOFT_TOUCH level_shift
     energy_SCF_previous = energy_SCF
 
+!   Calculate error vectors
+    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
+
 !   Print results at the end of each iteration
 
     write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
@@ -175,7 +176,7 @@ END_DOC
    call save_mos
   endif
 
-  call write_double(6, Energy_SCF, 'SCF energy')
+  call write_double(6, energy_SCF, 'SCF energy')
 
   call write_time(6)
 
diff --git a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
index 60201f5f..eb812401 100644
--- a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
+++ b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
@@ -14,21 +14,36 @@ program save_bitcpsileft_for_qmcchem
 
   e_ref = 0.d0
   iunit = 13
-  open(unit=iunit,file=trim(ezfio_filename)//'/simulation/e_ref',action='write')
-  call ezfio_has_fci_energy_pt2(exists)
-
-  if(.not.exists) then
-    call ezfio_has_fci_energy(exists)
+  open(unit=iunit, file=trim(ezfio_filename)//'/simulation/e_ref', action='write')
 
+    call ezfio_has_fci_energy_pt2(exists)
     if(.not.exists) then
-      call ezfio_has_tc_scf_bitc_energy(exists)
-      if(exists) then
-        call ezfio_get_tc_scf_bitc_energy(e_ref)
+
+      call ezfio_has_fci_energy(exists)
+      if(.not.exists) then
+
+        call ezfio_has_cisd_energy(exists)
+        if(.not.exists) then
+
+          call ezfio_has_tc_scf_bitc_energy(exists)
+          if(exists) then
+            call ezfio_get_tc_scf_bitc_energy(e_ref)
+          endif
+
+        else
+          call ezfio_get_cisd_energy(e_ref)
+        endif
+
+      else
+        call ezfio_get_fci_energy(e_ref)
       endif
+
+      else
+        call ezfio_get_fci_energy_pt2(e_ref)
     endif
 
-  endif
-  write(iunit,*) e_ref
+    write(iunit,*) e_ref
+
   close(iunit)
 
 end
diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f
new file mode 100644
index 00000000..291c52ef
--- /dev/null
+++ b/src/tc_bi_ortho/tc_som.irp.f
@@ -0,0 +1,70 @@
+! ---
+
+program tc_som
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting ...'
+  print *, ' do not forget to do tc-scf first'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf 
+  print *, ' mu = ', mu_erf
+  PROVIDE j1b_type
+  print *, ' j1b_type = ', j1b_type
+  print *, j1b_pen
+
+  read_wf = .true.
+  touch read_wf
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer          :: i, i_HF, degree
+  double precision :: hmono_1, htwoe_1, hthree_1, htot_1
+  double precision :: hmono_2, htwoe_2, hthree_2, htot_2
+  double precision :: U_SOM
+
+  PROVIDE N_int N_det
+
+  do i = 1, N_det
+    call get_excitation_degree(HF_bitmask, psi_det(1,1,i), degree, N_int)
+    if(degree == 0) then
+      i_HF = i
+      exit
+    endif
+  enddo
+  print *, ' HF determinants:', i_HF
+  print *, '          N_det :', N_det
+
+  U_SOM = 0.d0 
+  do i = 1, N_det
+    if(i == i_HF) cycle
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1)
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2)
+    U_SOM += htot_1 * htot_2
+  enddo
+  U_SOM = 0.5d0 * U_SOM
+  print *, ' U_SOM = ', U_SOM
+  
+  return
+end subroutine main
+
+! ---
+
diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f
index a49a5958..ebd43a7a 100644
--- a/src/tc_bi_ortho/test_tc_fock.irp.f
+++ b/src/tc_bi_ortho/test_tc_fock.irp.f
@@ -15,7 +15,8 @@ program test_tc_fock
   !call routine_2
 !  call routine_3()
 
- call test_3e
+! call test_3e
+ call routine_tot
 end
 
 ! ---
@@ -32,7 +33,7 @@ subroutine test_3e
  print*,'htot  = ',htot
  print*,''
  print*,''
- print*,'TC_one= ',TC_HF_one_electron_energy
+ print*,'TC_one= ',tc_hf_one_e_energy
  print*,'TC_two= ',TC_HF_two_e_energy
  print*,'TC_3e = ',diag_three_elem_hf
  print*,'TC_tot= ',TC_HF_energy
@@ -84,8 +85,8 @@ subroutine routine_3()
         print*, i, a
         stop
        endif
-       !print*, ' excited det'
-       !call debug_det(det_i, N_int)
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
  
        call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
        if(dabs(hthree).lt.1.d-10)cycle
@@ -116,3 +117,78 @@ subroutine routine_3()
 end subroutine routine_3
 
 ! ---
+subroutine routine_tot()
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i, a, i_ok, s1,other_spin(2)
+  double precision               :: hmono, htwoe, hthree, htilde_ij
+  double precision               :: err_ai, err_tot, ref, new
+  integer(bit_kind), allocatable :: det_i(:,:)
+
+  allocate(det_i(N_int,2))
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  err_tot = 0.d0
+ 
+!  do s1 = 1, 2
+   s1 = 2
+   det_i = ref_bitmask
+   call debug_det(det_i, N_int)
+   print*, ' HF det'
+   call debug_det(det_i, N_int)
+ 
+!   do i = 1, elec_num_tab(s1)
+!     do a = elec_num_tab(s1)+1, mo_num ! virtual 
+   do i = 1, elec_beta_num
+     do a = elec_beta_num+1, elec_alpha_num! virtual 
+!   do i = elec_beta_num+1, elec_alpha_num
+!     do a = elec_alpha_num+1, mo_num! virtual 
+       print*,i,a 
+ 
+       det_i = ref_bitmask
+       call do_single_excitation(det_i, i, a, s1, i_ok)
+       if(i_ok == -1) then
+        print*, 'PB !!'
+        print*, i, a
+        stop
+       endif
+ 
+       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       print*,htilde_ij
+       if(dabs(htilde_ij).lt.1.d-10)cycle
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
+
+       if(s1 == 1)then
+        new = Fock_matrix_tc_mo_alpha(a,i)
+       else
+        new = Fock_matrix_tc_mo_beta(a,i)
+       endif
+       ref = htilde_ij
+!       if(s1 == 1)then
+!        new = fock_a_tot_3e_bi_orth(a,i)
+!       else if(s1 == 2)then
+!        new = fock_b_tot_3e_bi_orth(a,i)
+!       endif
+       err_ai = dabs(dabs(ref) - dabs(new))
+       if(err_ai .gt. 1d-7) then
+         print*,'s1 = ',s1
+         print*, ' warning on', i, a
+         print*, ref,new,err_ai
+       endif
+       print*, ref,new,err_ai
+       err_tot += err_ai
+ 
+       write(22, *) htilde_ij
+     enddo
+   enddo
+!  enddo
+
+  print *, ' err_tot = ', err_tot
+
+  deallocate(det_i)
+
+end subroutine routine_3
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index e506d1cc..eb8fa8be 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -86,7 +86,7 @@ default: False
 type: Threshold
 doc: Threshold on the convergence of the Hartree Fock energy.
 interface: ezfio,provider,ocaml
-default: 1.e-10
+default: 1.e-12
 
 [n_it_tcscf_max]
 type: Strictly_positive_int
@@ -134,5 +134,53 @@ default: False
 type: integer
 doc: nb of Gaussians used to fit Jastrow fcts
 interface: ezfio,provider,ocaml
-default: 6
+default: 20
+
+[max_dim_diis_tcscf]
+type: integer
+doc: Maximum size of the DIIS extrapolation procedure
+interface: ezfio,provider,ocaml
+default: 15
+
+[threshold_diis_tcscf]
+type: Threshold
+doc: Threshold on the convergence of the DIIS error vector during a TCSCF calculation. If 0. is chosen, the square root of thresh_tcscf will be used.
+interface: ezfio,provider,ocaml
+default: 0.
+
+[level_shift_tcscf]
+type: Positive_float
+doc: Energy shift on the virtual MOs to improve TCSCF convergence
+interface: ezfio,provider,ocaml
+default: 0.
+
+[tcscf_algorithm]
+type: character*(32)
+doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS]
+interface: ezfio,provider,ocaml
+default: Simple 
+
+[im_thresh_tcscf]
+type: Threshold
+doc: Thresholds on the Imag part of energy
+interface: ezfio,provider,ocaml
+default: 1.e-7
+
+[test_cycle_tc]
+type: logical
+doc: If |true|, the integrals of the three-body jastrow are computed with cycles
+interface: ezfio,provider,ocaml
+default: False
+
+[thresh_biorthog_diag]
+type: Threshold
+doc: Threshold to determine if diagonal elements of the bi-orthogonal condition L.T x R are close enouph to 1
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_biorthog_nondiag]
+type: Threshold
+doc: Threshold to determine if non-diagonal elements of L.T x R are close enouph to 0
+interface: ezfio,provider,ocaml
+default: 1.e-6
 
diff --git a/src/tc_scf/diago_bi_ort_tcfock.irp.f b/src/tc_scf/diago_bi_ort_tcfock.irp.f
index 856b7382..726169d9 100644
--- a/src/tc_scf/diago_bi_ort_tcfock.irp.f
+++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f
@@ -1,3 +1,5 @@
+! ---
+
  BEGIN_PROVIDER [ double precision, fock_tc_reigvec_mo, (mo_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, fock_tc_leigvec_mo, (mo_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, eigval_fock_tc_mo, (mo_num)]
@@ -9,32 +11,46 @@
 
   implicit none
   integer                       :: n_real_tc 
-  integer                       :: i, k, l
+  integer                       :: i, j, k, l
   double precision              :: accu_d, accu_nd, accu_tmp
-  double precision              :: thr_d, thr_nd
   double precision              :: norm
   double precision, allocatable :: eigval_right_tmp(:)
+  double precision, allocatable :: F_tmp(:,:)
 
-  thr_d  = 1d-6
-  thr_nd = 1d-6
-
-  allocate( eigval_right_tmp(mo_num) )
+  allocate( eigval_right_tmp(mo_num), F_tmp(mo_num,mo_num) )
 
   PROVIDE Fock_matrix_tc_mo_tot
 
-  call non_hrmt_bieig( mo_num, Fock_matrix_tc_mo_tot, thr_d, thr_nd &
-                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo       & 
+  do i = 1, mo_num
+    do j = 1, mo_num
+      F_tmp(j,i) = Fock_matrix_tc_mo_tot(j,i)
+    enddo
+  enddo
+  ! insert level shift here
+  do i = elec_beta_num+1, elec_alpha_num
+    F_tmp(i,i) += 0.5d0 * level_shift_tcscf
+  enddo
+  do i = elec_alpha_num+1, mo_num
+    F_tmp(i,i) += level_shift_tcscf
+  enddo
+
+  call non_hrmt_bieig( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag &
+                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo                       & 
                      , n_real_tc, eigval_right_tmp )
+
   !if(max_ov_tc_scf)then
-  ! call non_hrmt_fock_mat( mo_num, Fock_matrix_tc_mo_tot, thr_d, thr_nd &
-  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
+  ! call non_hrmt_fock_mat( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag &
+  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo                          & 
   !                    , n_real_tc, eigval_right_tmp )
   !else 
-  ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, Fock_matrix_tc_mo_tot &
+  ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, F_tmp     &
   !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
   !                    , n_real_tc, eigval_right_tmp )
   !endif
 
+  deallocate(F_tmp)
+
+
 !  if(n_real_tc .ne. mo_num)then
 !   print*,'n_real_tc ne mo_num ! ',n_real_tc
 !   stop
@@ -42,9 +58,12 @@
 
   eigval_fock_tc_mo = eigval_right_tmp
 !  print*,'Eigenvalues of Fock_matrix_tc_mo_tot'
-!  do i = 1, mo_num
+!  do i = 1, elec_alpha_num
 !    print*, i, eigval_fock_tc_mo(i)
 !  enddo
+!  do i = elec_alpha_num+1, mo_num 
+!    print*, i, eigval_fock_tc_mo(i) - level_shift_tcscf
+!  enddo
 !  deallocate( eigval_right_tmp )
 
   ! L.T x R 
@@ -53,6 +72,8 @@
             , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
             , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
 
+  ! ---
+
   accu_d  = 0.d0
   accu_nd = 0.d0
   do i = 1, mo_num
@@ -63,45 +84,80 @@
       else
         accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
         accu_nd += accu_tmp * accu_tmp
-        if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thr_nd)then
+        if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then
          print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
         endif
       endif
     enddo 
   enddo
-  accu_nd = dsqrt(accu_nd)/accu_d
-
-  if(accu_nd .gt. thr_nd) then
+  accu_nd = dsqrt(accu_nd) / accu_d
+  if(accu_nd .gt. thresh_biorthog_nondiag) then
     print *, ' bi-orthog failed'
-    print*,'accu_nd MO = ', accu_nd, thr_nd
-    print*,'overlap_fock_tc_eigvec_mo = '
+    print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag
+    print *, ' overlap_fock_tc_eigvec_mo = '
     do i = 1, mo_num
       write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
     enddo
-   stop
+    stop
   endif
 
-  if( dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thr_d ) then
-    print *, 'mo_num     = ', mo_num 
-    print *, 'accu_d  MO = ', accu_d, thr_d
-    print *, 'normalizing vectors ...'
+  ! ---
+
+  if(dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thresh_biorthog_diag) then
+
+    print *, ' mo_num     = ', mo_num 
+    print *, ' accu_d  MO = ', accu_d, thresh_biorthog_diag
+    print *, ' normalizing vectors ...'
     do i = 1, mo_num
       norm = dsqrt(dabs(overlap_fock_tc_eigvec_mo(i,i)))
-      if(norm .gt. thr_d) then
+      if(norm .gt. thresh_biorthog_diag) then
         do k = 1, mo_num
           fock_tc_reigvec_mo(k,i) *= 1.d0/norm
           fock_tc_leigvec_mo(k,i) *= 1.d0/norm
         enddo
       endif
     enddo
+
     call dgemm( "T", "N", mo_num, mo_num, mo_num, 1.d0          &
               , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) &
               , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
               , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
+
+    accu_d  = 0.d0
+    accu_nd = 0.d0
+    do i = 1, mo_num
+      do k = 1, mo_num
+        if(i==k) then
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_d  += dabs(accu_tmp)
+        else
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_nd += accu_tmp * accu_tmp
+          if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then
+           print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
+          endif
+        endif
+      enddo 
+    enddo
+    accu_nd = dsqrt(accu_nd) / accu_d
+    if(accu_nd .gt. thresh_biorthog_diag) then
+      print *, ' bi-orthog failed'
+      print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag
+      print *, ' overlap_fock_tc_eigvec_mo = '
+      do i = 1, mo_num
+        write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
+      enddo
+      stop
+    endif
+
   endif
  
+  ! ---
+
 END_PROVIDER 
 
+! ---
+
  BEGIN_PROVIDER [ double precision, fock_tc_reigvec_ao, (ao_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, fock_tc_leigvec_ao, (ao_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, overlap_fock_tc_eigvec_ao, (mo_num, mo_num) ]
@@ -117,6 +173,7 @@ END_PROVIDER
   double precision              :: accu, accu_d
   double precision, allocatable :: tmp(:,:)
 
+  PROVIDE mo_l_coef mo_r_coef
 
 !  ! MO_R x R
    call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f
new file mode 100644
index 00000000..ff1077f5
--- /dev/null
+++ b/src/tc_scf/diis_tcscf.irp.f
@@ -0,0 +1,186 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero_TCSCF ]
+ 
+  implicit none
+ 
+  if(threshold_DIIS_TCSCF == 0.d0) then
+    threshold_DIIS_nonzero_TCSCF = dsqrt(thresh_tcscf)
+  else
+    threshold_DIIS_nonzero_TCSCF = threshold_DIIS_TCSCF
+  endif
+  ASSERT(threshold_DIIS_nonzero_TCSCF >= 0.d0)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, Q_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_alpha = mo_r_coef x eta_occ_alpha x mo_l_coef.T
+  !
+  ! [Q_alpha]_ij = \sum_{k=1}^{elec_alpha_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  Q_alpha = 0.d0
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_alpha, size(Q_alpha, 1) )
+
+END_PROVIDER
+  
+! ---
+    
+BEGIN_PROVIDER [ double precision, Q_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_beta = mo_r_coef x eta_occ_beta x mo_l_coef.T
+  !
+  ! [Q_beta]_ij = \sum_{k=1}^{elec_beta_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  Q_beta = 0.d0
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_beta, size(Q_beta, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Q_matrix, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_matrix = 2 mo_r_coef x eta_occ x mo_l_coef.T
+  ! 
+  ! with: 
+  !                        | 1   if i = j = 1, ..., nb of occ orbitals
+  !        [eta_occ]_ij =  |     
+  !                        | 0   otherwise
+  !
+  ! the diis error is defines as:
+  !                         e = F_ao x Q x ao_overlap - ao_overlap x Q x F_ao
+  ! with: 
+  !       mo_l_coef.T x ao_overlap x mo_r_coef = I
+  !       F_mo = mo_l_coef.T x F_ao x mo_r_coef
+  !       F_ao = (ao_overlap x mo_r_coef) x F_mo x (ao_overlap x mo_l_coef).T
+  !
+  ! ==> e = 2 ao_overlap x mo_r_coef x [ F_mo x eta_occ - eta_occ x F_mo ] x (ao_overlap x mo_l_coef).T
+  !
+  !      at convergence:
+  !                                      F_mo x eta_occ - eta_occ x F_mo = 0
+  !                                  ==> [F_mo]_ij ([eta_occ]_ii - [eta_occ]_jj) = 0  
+  !                                  ==> [F_mo]_ia = [F_mo]_ai = 0 where: i = occ and a = vir
+  !                                  ==> Brillouin conditions
+  !
+  END_DOC
+
+  implicit none
+
+  if(elec_alpha_num == elec_beta_num) then
+    Q_matrix = Q_alpha + Q_alpha
+  else
+    Q_matrix = Q_alpha + Q_beta
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)]
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  allocate(tmp(ao_num,ao_num))
+
+  ! F x Q
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                             &
+            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0             &
+            , tmp, size(tmp, 1), ao_overlap, size(ao_overlap, 1) &
+            , 0.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  ! S x Q
+  tmp = 0.d0
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                       &
+            , ao_overlap, size(ao_overlap, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S - S x Q x F
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, -1.d0                                  &
+            , tmp, size(tmp, 1), Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+            , 1.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  deallocate(tmp)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)]
+
+  implicit none
+
+  call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) &
+                        , FQS_SQF_mo, size(FQS_SQF_mo, 1) )
+
+END_PROVIDER
+
+! ---
+
+! BEGIN_PROVIDER [ double precision, eigenval_Fock_tc_ao, (ao_num) ]
+!&BEGIN_PROVIDER [ double precision, eigenvec_Fock_tc_ao, (ao_num,ao_num) ]
+!
+!  BEGIN_DOC
+!  !
+!  ! Eigenvalues and eigenvectors of the Fock matrix over the ao basis
+!  !
+!  ! F' = X.T x F x X   where X = ao_overlap^(-1/2)
+!  ! 
+!  ! F'   x Cr' = Cr' x E ==> F   Cr = Cr x E with Cr = X x Cr'
+!  ! F'.T x Cl' = Cl' x E ==> F.T Cl = Cl x E with Cl = X x Cl'
+!  !
+!  END_DOC
+!
+!  implicit none
+!  double precision, allocatable :: tmp1(:,:), tmp2(:,:)
+!
+!  ! ---
+!  ! Fock matrix in orthogonal basis: F' = X.T x F x X
+!
+!  allocate(tmp1(ao_num,ao_num))
+!  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                                 &
+!            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), S_half_inv, size(S_half_inv, 1) &
+!            , 0.d0, tmp1, size(tmp1, 1) )
+!
+!  allocate(tmp2(ao_num,ao_num))
+!  call dgemm( 'T', 'N', ao_num, ao_num, ao_num, 1.d0               &
+!            , S_half_inv, size(S_half_inv, 1), tmp1, size(tmp1, 1) &
+!            , 0.d0, tmp2, size(tmp2, 1) )
+!
+!  ! ---
+!
+!  ! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues
+!  ! TODO
+!
+!  ! Back-transform eigenvectors: C =X.C'
+!
+!END_PROVIDER
+
+! ---
+
+~                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
new file mode 100644
index 00000000..fccfd837
--- /dev/null
+++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@@ -0,0 +1,405 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_cs ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_mo_cs = 0.d0
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+   
+      do j = 1, elec_beta_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_cs(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij &
+                                            +        I_bij_ija &
+                                            +        I_bij_jai &
+                                            - 2.d0 * I_bij_aji &
+                                            - 2.d0 * I_bij_iaj &
+                                            - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_a ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj &
+                                           - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           - 2.d0 * I_bij_iaj &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           + I_bij_ija &
+                                           + I_bij_jai &
+                                           - I_bij_aji &
+                                           - I_bij_iaj &
+                                           - I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_b ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           - I_bij_aji )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
+
+  print *, ' PROVIDING fock_3e_uhf_ao_a ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_a = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_a * dm_dk_a * i_mugd_eknu &
+                                      + dm_ge_a * dm_dk_a * i_mugd_knue &
+                                      - dm_ge_a * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_a * i_mugd_kenu &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
+
+  print *, ' PROVIDING fock_3e_uhf_ao_b ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_b * dm_dk_b * i_mugd_eknu &
+                                      + dm_ge_b * dm_dk_b * i_mugd_knue &
+                                      - dm_ge_b * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_b * i_mugd_kenu &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
index 6b1c1d77..7403049c 100644
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@@ -1,63 +1,147 @@
 
 ! ---
 
- BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)]
-&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)]
- BEGIN_DOC
-! two_e_tc_non_hermit_integral_alpha(k,i) = <k| F^tc_alpha |i> 
-!
-! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
- END_DOC
+ BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_beta , (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! two_e_tc_non_hermit_integral_seq_alpha(k,i) = <k| F^tc_alpha |i> 
+  !
+  ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
+  !
+  END_DOC
+
   implicit none
   integer          :: i, j, k, l
   double precision :: density, density_a, density_b
+  double precision :: t0, t1
 
-  two_e_tc_non_hermit_integral_alpha = 0.d0
-  two_e_tc_non_hermit_integral_beta  = 0.d0
+  !print*, ' providing two_e_tc_non_hermit_integral_seq ...'
+  !call wall_time(t0)
+
+  two_e_tc_non_hermit_integral_seq_alpha = 0.d0
+  two_e_tc_non_hermit_integral_seq_beta  = 0.d0
 
-  !! TODO :: parallelization properly done
   do i = 1, ao_num
     do k = 1, ao_num
-!!$OMP PARALLEL                  &
-!!$OMP DEFAULT (NONE)            &
-!!$OMP PRIVATE (j,l,density_a,density_b,density) & 
-!!$OMP SHARED (i,k,ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,ao_non_hermit_term_chemist) & 
-!!$OMP SHARED (two_e_tc_non_hermit_integral_alpha,two_e_tc_non_hermit_integral_beta)
-!!$OMP DO SCHEDULE (dynamic)
       do j = 1, ao_num
         do l = 1, ao_num
 
           density_a = TCSCF_density_matrix_ao_alpha(l,j)
           density_b = TCSCF_density_matrix_ao_beta (l,j)
-          density   = density_a + density_b                      
+          density   = density_a + density_b
+
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho_a(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !!                                         rho_b(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
 
           !                                         rho(l,j)   *      < k l| T | i j>
-          two_e_tc_non_hermit_integral_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
           !                                         rho(l,j)   *      < k l| T | i j>
-          two_e_tc_non_hermit_integral_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
-          !                                         rho_a(l,j) *      < l k| T | i j>
-          two_e_tc_non_hermit_integral_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
-          !                                         rho_b(l,j) *      < l k| T | i j>
-          two_e_tc_non_hermit_integral_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
+          two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
+          !                                         rho_a(l,j) *      < k l| T | j i>
+          two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !                                         rho_b(l,j) *      < k l| T | j i>
+          two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
 
         enddo
       enddo
-!!$OMP END DO
-!!$OMP END PARALLEL
     enddo
   enddo
 
+  !call wall_time(t1)
+  !print*, ' wall time for two_e_tc_non_hermit_integral_seq after = ', t1 - t0
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! two_e_tc_non_hermit_integral_alpha(k,i) = <k| F^tc_alpha |i> 
+  !
+  ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l
+  double precision              :: density, density_a, density_b, I_coul, I_kjli
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp_a(:,:), tmp_b(:,:)
+
+  !print*, ' providing two_e_tc_non_hermit_integral ...'
+  !call wall_time(t0)
+
+  two_e_tc_non_hermit_integral_alpha = 0.d0
+  two_e_tc_non_hermit_integral_beta  = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                        &
+ !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli)              &
+ !$OMP SHARED  (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, &
+ !$OMP         two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta)
+
+  allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num))
+  tmp_a = 0.d0
+  tmp_b = 0.d0
+
+ !$OMP DO
+  do j = 1, ao_num
+    do l = 1, ao_num
+      density_a = TCSCF_density_matrix_ao_alpha(l,j)
+      density_b = TCSCF_density_matrix_ao_beta (l,j)
+      density   = density_a + density_b                      
+      do i = 1, ao_num
+        do k = 1, ao_num
+
+          I_coul = density * ao_two_e_tc_tot(k,i,l,j)
+          I_kjli = ao_two_e_tc_tot(k,j,l,i)
+
+          tmp_a(k,i) += I_coul - density_a * I_kjli
+          tmp_b(k,i) += I_coul - density_b * I_kjli
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do i = 1, ao_num
+    do j = 1, ao_num
+      two_e_tc_non_hermit_integral_alpha(j,i) += tmp_a(j,i)
+      two_e_tc_non_hermit_integral_beta (j,i) += tmp_b(j,i)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(tmp_a, tmp_b)
+ !$OMP END PARALLEL
+
+  !call wall_time(t1)
+  !print*, ' wall time for two_e_tc_non_hermit_integral after = ', t1 - t0
+
 END_PROVIDER 
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_alpha, (ao_num, ao_num)]
-  implicit none
+
   BEGIN_DOC
- ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the AO basis
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the AO basis
   END_DOC
-  Fock_matrix_tc_ao_alpha =  ao_one_e_integrals_tc_tot &
-                          + two_e_tc_non_hermit_integral_alpha 
+
+  implicit none
+
+  Fock_matrix_tc_ao_alpha =  ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha 
 
 END_PROVIDER 
 
@@ -66,102 +150,149 @@ END_PROVIDER
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_beta, (ao_num, ao_num)]
 
   BEGIN_DOC
- ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
   END_DOC
+
   implicit none
 
-  Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot &
-                         + two_e_tc_non_hermit_integral_beta 
+  Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_beta 
 
 END_PROVIDER 
-! ---
-
-!BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
-!  implicit none
-!  BEGIN_DOC
-! ! Total alpha+beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
-!  END_DOC
-!  Fock_matrix_tc_ao_tot = 0.5d0 * (Fock_matrix_tc_ao_alpha + Fock_matrix_tc_ao_beta)
-!END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
-  implicit none
+
   BEGIN_DOC
- ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
   END_DOC
-  if(bi_ortho)then
-   call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
-                         , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
-   if(three_body_h_tc)then
-    Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
-   endif
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  if(bi_ortho) then
+
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_alpha
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_a
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1))
+    !deallocate(tmp)
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+                          , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+    if(three_body_h_tc) then
+      !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
+      Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a
+    endif
+
   else
-   call ao_to_mo(  Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+    call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
                  , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+
   endif
+
 END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
-  implicit none
+
   BEGIN_DOC
- ! Total beta  TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the MO basis
   END_DOC
-  if(bi_ortho)then
-   call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
-                         , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
-   if(three_body_h_tc)then
-    Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
-   endif
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  if(bi_ortho) then
+
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_beta
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_b
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1))
+    !deallocate(tmp)
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+                          , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+    if(three_body_h_tc) then
+      !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
+      Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b
+    endif
+
   else
-   call ao_to_mo(  Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+
+    call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
                  , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+
   endif
+
 END_PROVIDER
 
-! ---
-
-!BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num, mo_num)]
-!  implicit none
-!  BEGIN_DOC
-! ! Total alpha+beta  TC Fock matrix : h_c + Two-e^TC terms on the MO basis
-!  END_DOC
-!  Fock_matrix_tc_mo_tot = 0.5d0 * (Fock_matrix_tc_mo_alpha + Fock_matrix_tc_mo_beta)
-!  if(three_body_h_tc) then
-!    Fock_matrix_tc_mo_tot += fock_3_mat
-!  endif
-!  !call restore_symmetry(mo_num, mo_num, Fock_matrix_tc_mo_tot, mo_num, 1.d-10)
-!END_PROVIDER 
-
 ! ---
 
  BEGIN_PROVIDER [ double precision, grad_non_hermit_left]
 &BEGIN_PROVIDER [ double precision, grad_non_hermit_right]
 &BEGIN_PROVIDER [ double precision, grad_non_hermit]
- implicit none
+
+  implicit none
   integer :: i, k
-  grad_non_hermit_left = 0.d0
+
+  grad_non_hermit_left  = 0.d0
   grad_non_hermit_right = 0.d0
+
   do i = 1, elec_beta_num ! doc --> SOMO
     do k = elec_beta_num+1, elec_alpha_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      !grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      !grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
     enddo
   enddo
+
   do i = 1, elec_beta_num ! doc --> virt 
     do k = elec_alpha_num+1, mo_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
     enddo
   enddo
+
   do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt 
     do k = elec_alpha_num+1, mo_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
     enddo
   enddo
- grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right
+
+  !grad_non_hermit = dsqrt(grad_non_hermit_left) + dsqrt(grad_non_hermit_right)
+  grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right
+
 END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
+
+  implicit none
+
+  call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) &
+                        , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) )
+
+END_PROVIDER
+
+! ---
+
+
diff --git a/src/tc_scf/fock_tc_mo_tot.irp.f b/src/tc_scf/fock_tc_mo_tot.irp.f
index a99c7698..2f33cd17 100644
--- a/src/tc_scf/fock_tc_mo_tot.irp.f
+++ b/src/tc_scf/fock_tc_mo_tot.irp.f
@@ -73,6 +73,29 @@
              + (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
        enddo
      enddo
+     if(three_body_h_tc)then
+      ! C-O
+      do j = 1, elec_beta_num
+       do i = elec_beta_num+1, elec_alpha_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! C-V
+      do j = 1, elec_beta_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! O-V
+      do j = elec_beta_num+1, elec_alpha_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+     endif
 
    endif
 
diff --git a/src/tc_scf/fock_three.irp.f b/src/tc_scf/fock_three.irp.f
index f73a5049..424eeffd 100644
--- a/src/tc_scf/fock_three.irp.f
+++ b/src/tc_scf/fock_three.irp.f
@@ -70,52 +70,76 @@ subroutine give_fock_ia_three_e_total(i,a,contrib)
 
 end
 
+! ---
+
 BEGIN_PROVIDER [double precision, diag_three_elem_hf]
- implicit none
- integer :: i,j,k,ipoint,mm
- double precision :: contrib,weight,four_third,one_third,two_third,exchange_int_231
- print*,'providing diag_three_elem_hf'
- if(.not.three_body_h_tc)then
-  diag_three_elem_hf = 0.d0
- else
-  if(.not.bi_ortho)then
-   one_third = 1.d0/3.d0
-   two_third = 2.d0/3.d0
-   four_third = 4.d0/3.d0
-   diag_three_elem_hf = 0.d0
-   do i = 1, elec_beta_num
-    do j = 1, elec_beta_num
-     do k = 1, elec_beta_num
-      call  give_integrals_3_body(k,j,i,j,i,k,exchange_int_231)   
-      diag_three_elem_hf += two_third * exchange_int_231
-     enddo
-    enddo
-   enddo
-   do mm = 1, 3
-    do ipoint = 1, n_points_final_grid
-     weight = final_weight_at_r_vector(ipoint)                                                                          
-     contrib   = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm)  & 
-                -2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm)                                 & 
-                -1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm)
-     contrib  *= four_third
-     contrib  += -two_third  * fock_3_rho_beta(ipoint)     * fock_3_w_kl_w_kl(ipoint,mm) & 
-                - four_third * fock_3_w_kk_sum(ipoint,mm)  * fock_3_w_kl_mo_k_mo_l(ipoint,mm)
-     diag_three_elem_hf += weight * contrib
-    enddo
-   enddo
-   diag_three_elem_hf = - diag_three_elem_hf
+
+  implicit none
+  integer          :: i, j, k, ipoint, mm
+  double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231
+  double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' providing diag_three_elem_hf'
+
+  if(.not. three_body_h_tc) then
+
+    diag_three_elem_hf = 0.d0
+
   else
-   double precision :: integral_aaa,hthree, integral_aab,integral_abb,integral_bbb
-   provide mo_l_coef mo_r_coef
-   call give_aaa_contrib(integral_aaa)
-   call give_aab_contrib(integral_aab)
-   call give_abb_contrib(integral_abb)
-   call give_bbb_contrib(integral_bbb)
-   diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+
+    if(.not. bi_ortho) then
+
+      ! ---
+
+      one_third  = 1.d0/3.d0
+      two_third  = 2.d0/3.d0
+      four_third = 4.d0/3.d0
+      diag_three_elem_hf = 0.d0
+      do i = 1, elec_beta_num
+        do j = 1, elec_beta_num
+          do k = 1, elec_beta_num
+            call give_integrals_3_body(k, j, i, j, i, k,exchange_int_231)   
+            diag_three_elem_hf += two_third * exchange_int_231
+          enddo
+        enddo
+      enddo
+      do mm = 1, 3
+        do ipoint = 1, n_points_final_grid
+          weight  = final_weight_at_r_vector(ipoint)                                                                          
+          contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & 
+                  - 2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm)                     & 
+                  - 1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm)
+          contrib *= four_third
+          contrib += -two_third  * fock_3_rho_beta(ipoint)    * fock_3_w_kl_w_kl(ipoint,mm) & 
+                     -four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm)
+          diag_three_elem_hf += weight * contrib
+       enddo
+      enddo
+
+      diag_three_elem_hf = - diag_three_elem_hf
+
+      ! ---
+
+    else
+
+      provide mo_l_coef mo_r_coef
+      call give_aaa_contrib(integral_aaa)
+      call give_aab_contrib(integral_aab)
+      call give_abb_contrib(integral_abb)
+      call give_bbb_contrib(integral_bbb)
+      diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+!      print*,'integral_aaa + integral_aab + integral_abb + integral_bbb'
+!      print*,integral_aaa , integral_aab , integral_abb , integral_bbb
+
+    endif
+
   endif
- endif
+
 END_PROVIDER 
 
+! ---
 
 BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh, (mo_num, mo_num)]
  implicit none 
diff --git a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
index b0345957..f73171a3 100644
--- a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
+++ b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
@@ -1,202 +1,286 @@
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo,contrib
- fock_a_tot_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth(a,i)
-   fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i)
-   fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i)
+
+  implicit none
+  integer :: i, a
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_a_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i)
+    enddo
   enddo
- enddo
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo,contrib
- fock_b_tot_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth(a,i)
-   fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i)
-   fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i)
+
+  implicit none
+  integer :: i, a
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i)
+    enddo
   enddo
- enddo
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- double precision :: new
- fock_cs_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = 1, elec_beta_num
-    do k = 1, elec_beta_num
-!      call contrib_3e_sss(a,i,j,k,contrib_sss)
-!      call contrib_3e_soo(a,i,j,k,contrib_soo)
-!      call contrib_3e_sos(a,i,j,k,contrib_sos)
-!      contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos
 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
-      call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
- ! negative terms :: exchange contrib
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
-      new  = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) & 
-             -1.5d0 * exch_13_int - exch_23_int
-      fock_cs_3e_bi_orth(a,i) += new
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_cs_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = 1, elec_beta_num
+        do k = 1, elec_beta_num
+
+          !!call contrib_3e_sss(a,i,j,k,contrib_sss)
+          !!call contrib_3e_soo(a,i,j,k,contrib_soo)
+          !!call contrib_3e_sos(a,i,j,k,contrib_sos)
+          !!contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos
+ 
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+
+          ! negative terms :: exchange contrib
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+
+          new = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) -1.5d0 * exch_13_int - exch_23_int
+
+          fock_cs_3e_bi_orth(a,i) += new
+        enddo
+      enddo
     enddo
-   enddo
-  
   enddo
- enddo
- fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
+ 
+  fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
 
 END_PROVIDER 
 
+! ---
 
 BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- double precision :: new
- fock_a_tmp1_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = elec_beta_num + 1, elec_alpha_num 
-    do k = 1, elec_beta_num
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
-      call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
-      fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) &
-      + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
-    enddo
-   enddo
 
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_a_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_beta_num
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+          
+          fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
+        enddo
+      enddo
+    enddo
   enddo
- enddo
- fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
+
+  fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss
- fock_a_tmp2_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   do j = 1, elec_alpha_num
-    do k = elec_beta_num+1, elec_alpha_num
-      call contrib_3e_sss(a,i,j,k,contrib_sss)
-      fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss
+
+  PROVIDE mo_l_coef mo_r_coef
+ 
+  fock_a_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_alpha_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call contrib_3e_sss(a, i, j, k, contrib_sss)
+
+          fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+        enddo
+      enddo
     enddo
-   enddo
   enddo
- enddo
+
 END_PROVIDER 
 
-
-
-
+! ---
 
 BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
- double precision :: new
- fock_b_tmp1_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = 1, elec_beta_num
-    do k = elec_beta_num+1, elec_alpha_num
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
-    enddo
-   enddo
 
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_beta_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+
+          fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
+        enddo
+      enddo
+    enddo
   enddo
- enddo
- fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
+
+  fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_soo
- fock_b_tmp2_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   do j = elec_beta_num + 1, elec_alpha_num 
-    do k = 1, elec_alpha_num
-      call contrib_3e_soo(a,i,j,k,contrib_soo)
-      fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_soo
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_alpha_num
+          call contrib_3e_soo(a, i, j, k, contrib_soo)
+
+          fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+        enddo
+      enddo
     enddo
-   enddo
   enddo
- enddo
+
 END_PROVIDER 
 
-subroutine contrib_3e_sss(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
- call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
- integral = direct_int + c_3_int + c_minus_3_int 
- ! negative terms :: exchange contrib
- call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
- call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
- call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
- integral += - exch_13_int - exch_23_int  - exch_12_int 
- integral = -integral
+! ---
+
+subroutine contrib_3e_sss(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+  call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+  integral = direct_int + c_3_int + c_minus_3_int 
+
+  ! negative terms :: exchange contrib
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+  call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+  integral += - exch_13_int - exch_23_int  - exch_12_int 
+
+  integral = -integral
+
 end
 
+! ---
+
 subroutine contrib_3e_soo(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_23_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
- integral = direct_int - exch_23_int 
- integral = -integral
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_23_int
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
+  integral = direct_int - exch_23_int 
+
+  integral = -integral
+
 end
 
-subroutine contrib_3e_sos(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_13_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
- integral = direct_int - exch_13_int 
- integral = -integral
+! ---
+
+subroutine contrib_3e_sos(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
+  integral = direct_int - exch_13_int 
+
+  integral = -integral
+
 end
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f
new file mode 100644
index 00000000..306c78b3
--- /dev/null
+++ b/src/tc_scf/rh_tcscf_diis.irp.f
@@ -0,0 +1,362 @@
+! ---
+
+subroutine rh_tcscf_diis()
+
+  implicit none
+
+  integer                       :: i, j, it
+  integer                       :: dim_DIIS, index_dim_DIIS
+  double precision              :: etc_tot, etc_1e, etc_2e, etc_3e, e_save, e_delta
+  double precision              :: tc_grad, g_save, g_delta, g_delta_th
+  double precision              :: level_shift_save, rate_th
+  double precision              :: t0, t1
+  double precision              :: er_DIIS, er_delta, er_save, er_delta_th
+  double precision, allocatable :: F_DIIS(:,:,:), E_DIIS(:,:,:)
+  double precision, allocatable :: mo_r_coef_save(:,:), mo_l_coef_save(:,:)
+
+  logical, external             :: qp_stop
+
+  it          = 0
+  e_save      = 0.d0
+  dim_DIIS    = 0
+  g_delta_th  = 1d0
+  er_delta_th = 1d0
+  rate_th     = 100.d0 !0.01d0 !0.2d0
+
+  allocate(mo_r_coef_save(ao_num,mo_num), mo_l_coef_save(ao_num,mo_num))
+  mo_l_coef_save = 0.d0
+  mo_r_coef_save = 0.d0
+
+  allocate(F_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF), E_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF))
+  F_DIIS = 0.d0
+  E_DIIS = 0.d0
+
+  call write_time(6)
+
+  ! ---
+
+  PROVIDE level_shift_TCSCF
+  PROVIDE mo_l_coef mo_r_coef
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '================', '====', '========'
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+          , '    gradient    ', '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '================', '====', '========'
+
+
+  ! first iteration (HF orbitals)
+  call wall_time(t0)
+
+  etc_tot = TC_HF_energy
+  etc_1e  = TC_HF_one_e_energy
+  etc_2e  = TC_HF_two_e_energy
+  etc_3e  = 0.d0
+  if(three_body_h_tc) then
+    etc_3e = diag_three_elem_hf
+  endif
+  tc_grad = grad_non_hermit
+  er_DIIS = maxval(abs(FQS_SQF_mo))
+  e_delta = dabs(etc_tot - e_save)
+
+  e_save  = etc_tot
+  g_save  = tc_grad
+  er_save = er_DIIS
+
+  call wall_time(t1)
+  write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+    it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+  ! ---
+
+  PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot
+
+  do while((tc_grad .gt. dsqrt(thresh_tcscf)) .and. (er_DIIS .gt. threshold_DIIS_nonzero_TCSCF))
+
+    call wall_time(t0)
+
+    it += 1
+    if(it > n_it_TCSCF_max) then
+      print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+      stop
+    endif
+
+    dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF)
+
+    ! ---
+
+    if(dabs(e_delta) > 1.d-12) then
+
+      index_dim_DIIS = mod(dim_DIIS-1, max_dim_DIIS_TCSCF) + 1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          F_DIIS(i,j,index_dim_DIIS) = Fock_matrix_tc_ao_tot(i,j)
+          E_DIIS(i,j,index_dim_DIIS) = FQS_SQF_ao           (i,j)
+        enddo
+      enddo
+
+      call extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), it, dim_DIIS)
+
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                            , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) )
+      TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot
+    endif
+
+    ! ---
+
+    mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+    mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+    !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+    !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    TOUCH mo_l_coef mo_r_coef
+
+    ! ---
+
+    g_delta  = grad_non_hermit         -  g_save
+    er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+    !if((g_delta > rate_th * g_delta_th) .and. (er_delta > rate_th * er_delta_th) .and. (it > 1)) then
+    if((g_delta > rate_th * g_delta_th) .and. (it > 1)) then
+    !if((g_delta > 0.d0) .and. (it > 1)) then
+
+      Fock_matrix_tc_ao_tot(1:ao_num,1:ao_num) = F_DIIS(1:ao_num,1:ao_num,index_dim_DIIS)
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                            , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) )
+      TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+    endif
+
+    ! ---
+
+    g_delta  = grad_non_hermit         -  g_save
+    er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+    mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num)
+    mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num)
+
+    !do while((g_delta > rate_th * g_delta_th) .and. (er_delta > rate_th * er_delta_th) .and. (it > 1))
+    do while((g_delta > rate_th * g_delta_th) .and. (it > 1))
+      print *, ' big or bad step : ', g_delta, rate_th * g_delta_th
+
+      mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num) 
+      mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num) 
+      if(level_shift_TCSCF <= .1d0) then
+        level_shift_TCSCF = 1.d0
+      else
+        level_shift_TCSCF = level_shift_TCSCF * 3.0d0
+      endif
+      TOUCH mo_l_coef mo_r_coef level_shift_TCSCF
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+      g_delta  = grad_non_hermit         -  g_save
+      er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+      if(level_shift_TCSCF - level_shift_save > 40.d0) then
+        level_shift_TCSCF = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift_TCSCF
+        exit
+      endif
+
+      dim_DIIS = 0
+    enddo
+
+    ! ---
+
+    level_shift_TCSCF = level_shift_TCSCF * 0.5d0
+    SOFT_TOUCH level_shift_TCSCF
+
+    etc_tot = TC_HF_energy
+    etc_1e  = TC_HF_one_e_energy
+    etc_2e  = TC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    tc_grad  = grad_non_hermit
+    er_DIIS  = maxval(abs(FQS_SQF_mo))
+    e_delta  = dabs(etc_tot - e_save)
+    g_delta  = tc_grad - g_save
+    er_delta = er_DIIS - er_save
+    
+    e_save           = etc_tot
+    g_save           = tc_grad
+    level_shift_save = level_shift_TCSCF
+    er_save          = er_DIIS
+
+    g_delta_th  = dabs(tc_grad) ! g_delta)
+    er_delta_th = dabs(er_DIIS) !er_delta)
+
+    call wall_time(t1)
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+    if(g_delta .lt. 0.d0) then
+      call ezfio_set_tc_scf_bitc_energy(etc_tot)
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    endif
+
+    if(qp_stop()) exit
+  enddo
+
+  ! ---
+
+  print *, ' TCSCF DIIS converged !'
+  call print_energy_and_mos()
+
+  call write_time(6)
+
+  deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS)
+
+  call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+
+end
+
+! ---
+
+subroutine extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, F_ao, size_F_ao, it, dim_DIIS)
+
+  BEGIN_DOC
+  !
+  ! Compute the extrapolated Fock matrix using the DIIS procedure
+  !
+  ! e = \sum_i c_i e_i and \sum_i c_i = 1 
+  ! ==> lagrange multiplier with L = |e|^2 - \lambda (\sum_i c_i = 1)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)    :: it, size_F_ao
+  integer,          intent(inout) :: dim_DIIS
+  double precision, intent(in)    :: F_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(in)    :: E_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(inout) :: F_ao(size_F_ao,ao_num)
+
+  double precision, allocatable   :: B_matrix_DIIS(:,:), X_vector_DIIS(:), C_vector_DIIS(:)
+
+  integer                         :: i, j, k, l, i_DIIS, j_DIIS
+  integer                         :: lwork
+  double precision                :: rcond, ferr, berr
+  integer,          allocatable   :: iwork(:)
+  double precision, allocatable   :: scratch(:,:)
+
+  if(dim_DIIS < 1) then
+    return
+  endif
+
+  allocate( B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), X_vector_DIIS(dim_DIIS+1) &
+          , C_vector_DIIS(dim_DIIS+1), scratch(ao_num,ao_num) )
+
+  ! Compute the matrices B and X
+  B_matrix_DIIS(:,:) = 0.d0
+  do j = 1, dim_DIIS
+    j_DIIS = min(dim_DIIS, mod(it-j, max_dim_DIIS_TCSCF)+1)
+
+    do i = 1, dim_DIIS
+      i_DIIS = min(dim_DIIS, mod(it-i, max_dim_DIIS_TCSCF)+1)
+
+      ! Compute product of two errors vectors
+      do l = 1, ao_num
+        do k = 1, ao_num
+          B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + E_DIIS(k,l,i_DIIS) * E_DIIS(k,l,j_DIIS)
+        enddo
+      enddo
+
+    enddo
+  enddo
+
+  ! Pad B matrix and build the X matrix
+
+  C_vector_DIIS(:) = 0.d0
+  do i = 1, dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+  enddo
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+  deallocate(scratch)
+
+  ! Estimate condition number of B
+  integer                       :: info
+  double precision              :: anorm
+  integer,          allocatable :: ipiv(:)
+  double precision, allocatable :: AF(:,:)
+  double precision, external :: dlange
+
+  lwork = max((dim_DIIS+1)**2, (dim_DIIS+1)*5)
+  allocate(AF(dim_DIIS+1,dim_DIIS+1))
+  allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) )
+  allocate(scratch(lwork,1))
+  scratch(:,1) = 0.d0
+
+  anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, size(B_matrix_DIIS, 1), scratch(1,1))
+
+  AF(:,:) = B_matrix_DIIS(:,:)
+  call dgetrf(dim_DIIS+1, dim_DIIS+1, AF, size(AF, 1), ipiv, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  call dgecon('1', dim_DIIS+1, AF, size(AF, 1), anorm, rcond, scratch, iwork, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  if(rcond < 1.d-14) then
+    dim_DIIS = 0
+    return
+  endif
+
+  ! solve the linear system C = B x X
+
+  X_vector_DIIS = C_vector_DIIS
+  call dgesv(dim_DIIS+1, 1, B_matrix_DIIS, size(B_matrix_DIIS, 1), ipiv , X_vector_DIIS, size(X_vector_DIIS, 1), info)
+
+  deallocate(scratch, AF, iwork)
+  if(info < 0) then
+    stop ' bug in TC-DIIS'
+  endif
+
+  ! Compute extrapolated Fock matrix
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num > 200)
+  do j = 1, ao_num
+    do i = 1, ao_num
+      F_ao(i,j) = 0.d0
+    enddo
+    do k = 1, dim_DIIS
+      if(dabs(X_vector_DIIS(k)) < 1.d-10) cycle
+      do i = 1,ao_num
+        ! FPE here
+        F_ao(i,j) = F_ao(i,j) + X_vector_DIIS(k) * F_DIIS(i,j,dim_DIIS-k+1)
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf_simple.irp.f b/src/tc_scf/rh_tcscf_simple.irp.f
new file mode 100644
index 00000000..30798e3d
--- /dev/null
+++ b/src/tc_scf/rh_tcscf_simple.irp.f
@@ -0,0 +1,129 @@
+! ---
+
+subroutine rh_tcscf_simple()
+
+  implicit none
+  integer                       :: i, j, it, dim_DIIS
+  double precision              :: t0, t1
+  double precision              :: e_save, e_delta, rho_delta
+  double precision              :: etc_tot, etc_1e, etc_2e, etc_3e, tc_grad
+  double precision              :: er_DIIS
+  double precision, allocatable :: rho_old(:,:), rho_new(:,:)
+
+  allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num))
+
+  it       = 0
+  e_save   = 0.d0
+  dim_DIIS = 0
+
+  ! ---
+
+  if(.not. bi_ortho) then
+   print *, ' grad_hermit = ', grad_hermit
+   call save_good_hermit_tc_eigvectors
+   TOUCH mo_coef 
+   call save_mos
+  endif
+
+  ! ---
+
+  if(bi_ortho) then
+
+    PROVIDE level_shift_tcscf
+    PROVIDE mo_l_coef mo_r_coef
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      '====', '================', '================', '================', '================', '================' &
+            , '================', '================', '================', '====', '========'
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+            , '    gradient    ', '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      '====', '================', '================', '================', '================', '================' &
+            , '================', '================', '================', '====', '========'
+
+
+    ! first iteration (HF orbitals)
+    call wall_time(t0)
+
+    etc_tot = TC_HF_energy
+    etc_1e  = TC_HF_one_e_energy
+    etc_2e  = TC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    tc_grad = grad_non_hermit
+    er_DIIS = maxval(abs(FQS_SQF_mo))
+    e_delta = dabs(etc_tot - e_save)
+    e_save  = etc_tot
+
+    call wall_time(t1)
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+    do while(tc_grad .gt. dsqrt(thresh_tcscf))
+      call wall_time(t0)
+
+      it += 1
+      if(it > n_it_tcscf_max) then
+        print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+        stop
+      endif
+
+      mo_l_coef = fock_tc_leigvec_ao
+      mo_r_coef = fock_tc_reigvec_ao
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+      etc_tot = TC_HF_energy
+      etc_1e  = TC_HF_one_e_energy
+      etc_2e  = TC_HF_two_e_energy
+      etc_3e  = 0.d0
+      if(three_body_h_tc) then
+        etc_3e = diag_three_elem_hf
+      endif
+      tc_grad = grad_non_hermit
+      er_DIIS = maxval(abs(FQS_SQF_mo))
+      e_delta = dabs(etc_tot - e_save)
+      e_save  = etc_tot
+
+      call ezfio_set_tc_scf_bitc_energy(etc_tot)
+
+      call wall_time(t1)
+      write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+        it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+    enddo
+
+  else
+
+   do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. (it.lt.n_it_tcscf_max) )
+      print*,'grad_hermit = ',grad_hermit
+      it += 1
+      print *, 'iteration = ', it
+      print *, '***'
+      print *, 'TC HF total energy = ', TC_HF_energy
+      print *, 'TC HF 1 e   energy = ', TC_HF_one_e_energy
+      print *, 'TC HF 2 e   energy = ', TC_HF_two_e_energy
+      print *, 'TC HF 3 body       = ', diag_three_elem_hf
+      print *, '***'
+      print *, ''
+      call save_good_hermit_tc_eigvectors
+      TOUCH mo_coef 
+      call save_mos
+    enddo
+
+  endif
+
+  print *, ' TCSCF Simple converged !'
+  call print_energy_and_mos()
+
+  deallocate(rho_old, rho_new)
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f
index d53991ed..fc4a7935 100644
--- a/src/tc_scf/rotate_tcscf_orbitals.irp.f
+++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f
@@ -260,14 +260,10 @@ subroutine fix_right_to_one()
   integer                       :: i, j, m, n, mm, tot_deg
   double precision              :: accu_d, accu_nd
   double precision              :: de_thr, ei, ej, de
-  double precision              :: thr_d, thr_nd
   integer,          allocatable :: deg_num(:)
   double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:)
   double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:)
 
-  thr_d  = 1d-7
-  thr_nd = 1d-7
-
   n = ao_num
   m = mo_num
 
@@ -340,7 +336,7 @@ subroutine fix_right_to_one()
       ! ---
 
       call impose_weighted_orthog_svd(n, mm, W, R)
-      call impose_weighted_biorthog_qr(n, mm, thr_d, thr_nd, R, W, L)
+      call impose_weighted_biorthog_qr(n, mm, thresh_biorthog_diag, thresh_biorthog_nondiag, R, W, L)
 
       ! ---
 
@@ -353,7 +349,7 @@ subroutine fix_right_to_one()
     endif
   enddo
 
-  call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thr_d, thr_nd, .true.)
+  call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thresh_biorthog_diag, thresh_biorthog_nondiag, .true.)
 
   deallocate(W, deg_num)
 
diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f
index 42925e41..596ae500 100644
--- a/src/tc_scf/routines_rotates.irp.f
+++ b/src/tc_scf/routines_rotates.irp.f
@@ -116,7 +116,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
     print *, ' ------------------------------------'
     call orthog_functions(ao_num, n_degen, mo_l_coef_tmp, ao_overlap)
 
-    print *, ' Overlap lef-right '
+    print *, ' Overlap left-right '
     call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp)
     do j = 1, n_degen
      write(*,'(100(F8.4,X))') stmp(:,j)
@@ -259,7 +259,7 @@ subroutine orthog_functions(m, n, coef, overlap)
   double precision, intent(in)    :: overlap(m,m)
   double precision, intent(inout) :: coef(m,n)
   double precision, allocatable   :: stmp(:,:)
-  integer                         :: j
+  integer                         :: j, k
 
   allocate(stmp(n,n))
   call build_s_matrix(m, n, coef, coef, overlap, stmp)
@@ -270,7 +270,13 @@ subroutine orthog_functions(m, n, coef, overlap)
   call impose_orthog_svd_overlap(m, n, coef, overlap)
   call build_s_matrix(m, n, coef, coef, overlap, stmp)
   do j = 1, n
-    coef(1,:m) *= 1.d0/dsqrt(stmp(j,j))
+    ! ---
+    ! TODO: MANU check ici
+    !coef(1,:m) *= 1.d0/dsqrt(stmp(j,j))
+    do k = 1, m
+      coef(k,j) *= 1.d0/dsqrt(stmp(j,j))
+    enddo
+    ! ---
   enddo
   call build_s_matrix(m, n, coef, coef, overlap, stmp)
 
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
index 48cbbdc0..187750ff 100644
--- a/src/tc_scf/tc_scf.irp.f
+++ b/src/tc_scf/tc_scf.irp.f
@@ -1,7 +1,9 @@
+! ---
+
 program tc_scf
 
   BEGIN_DOC
-! TODO : Put the documentation of the program here
+  ! TODO : Put the documentation of the program here
   END_DOC
 
   implicit none
@@ -15,43 +17,51 @@ program tc_scf
 !  my_n_pt_a_grid = 26 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 
-  !call create_guess
-  !call orthonormalize_mos
+  PROVIDE mu_erf 
+  print *, ' mu = ', mu_erf
+  PROVIDE j1b_type
+  print *, ' j1b_type = ', j1b_type
+  print *, j1b_pen
+
+  !call create_guess()
+  !call orthonormalize_mos()
+
+  PROVIDE tcscf_algorithm
+  if(tcscf_algorithm == 'DIIS') then
+    call rh_tcscf_diis()
+  elseif(tcscf_algorithm == 'Simple') then
+    call rh_tcscf_simple()
+  else
+    print *, ' not implemented yet', tcscf_algorithm
+    stop
+  endif
 
-  call routine_scf()
   call minimize_tc_orb_angles()
   call print_energy_and_mos()
 
-
 end
 
 ! ---
 
-subroutine create_guess
-
-  BEGIN_DOC
-  !   Create a MO guess if no MOs are present in the EZFIO directory
-  END_DOC
+subroutine create_guess()
 
   implicit none
   logical :: exists
 
   PROVIDE ezfio_filename
-  call ezfio_has_mo_basis_mo_coef(exists)
+  !call ezfio_has_mo_basis_mo_coef(exists)
+  exists = .false.
 
-  if (.not.exists) then
+  if(.not.exists) then
     mo_label = 'Guess'
-    if (mo_guess_type == "HCore") then
+    if(mo_guess_type == "HCore") then
       mo_coef = ao_ortho_lowdin_coef
       call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
       TOUCH mo_coef
-      call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals,     &
-          size(mo_one_e_integrals,1),                            &
-          size(mo_one_e_integrals,2),                            &
-          mo_label,1,.false.)
-      call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef,1), 1.d-10)
+      call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals, size(mo_one_e_integrals, 1), size(mo_one_e_integrals, 2), mo_label, 1, .false.)
+      call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
       SOFT_TOUCH mo_coef
-    else if (mo_guess_type == "Huckel") then
+    elseif (mo_guess_type == "Huckel") then
       call huckel_guess
     else
       print *,  'Unrecognized MO guess type : '//mo_guess_type
@@ -64,121 +74,3 @@ end subroutine create_guess
 
 ! ---
 
-subroutine routine_scf()
-
-  implicit none
-  integer                       :: i, j, it
-  double precision              :: e_save, e_delta, rho_delta
-  double precision, allocatable :: rho_old(:,:), rho_new(:,:)
-
-  allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num))
-
-  it = 0
-  print*,'iteration = ', it
-
-  !print*,'grad_hermit = ', grad_hermit
-  print*,'***'
-  print*,'TC HF total energy = ', TC_HF_energy
-  print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
-  print*,'TC HF 2 e   energy = ', TC_HF_two_e_energy
-  if(three_body_h_tc)then
-   print*,'TC HF 3 body       = ', diag_three_elem_hf
-  endif
-  print*,'***'
-  e_delta = 10.d0
-  e_save  = 0.d0 !TC_HF_energy
-  rho_delta = 10.d0
-
-
-  if(bi_ortho)then
-
-   mo_l_coef = fock_tc_leigvec_ao
-   mo_r_coef = fock_tc_reigvec_ao
-   rho_old   = TCSCF_bi_ort_dm_ao
-   call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
-   call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
-   TOUCH mo_l_coef mo_r_coef
-
-
-  else
-
-   print*,'grad_hermit = ',grad_hermit
-   call save_good_hermit_tc_eigvectors
-   TOUCH mo_coef 
-   call save_mos
-
-  endif
-
-  ! ---
-
-  if(bi_ortho) then
-
-    !do while( it .lt. n_it_tcscf_max .and. (e_delta .gt. dsqrt(thresh_tcscf)) )
-    !do while( it .lt. n_it_tcscf_max .and. (e_delta .gt. thresh_tcscf) )
-    !do while( it .lt. n_it_tcscf_max .and. (rho_delta .gt. thresh_tcscf) )
-    do while( it .lt. n_it_tcscf_max .and. (grad_non_hermit_right.gt. dsqrt(thresh_tcscf)) )
-
-      it += 1
-      print*,'iteration = ', it
-      print*,'***'
-      print*,'TC HF total energy = ', TC_HF_energy
-      print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
-      print*,'TC HF 2 non hermit = ', TC_HF_two_e_energy
-      if(three_body_h_tc)then
-       print*,'TC HF 3 body       = ', diag_three_elem_hf
-      endif
-      print*,'***'
-      e_delta = dabs( TC_HF_energy - e_save )
-      print*, 'it, delta E = ', it, e_delta
-      print*, 'it, gradient= ',grad_non_hermit_right
-      e_save    = TC_HF_energy
-      mo_l_coef = fock_tc_leigvec_ao
-      mo_r_coef = fock_tc_reigvec_ao
-
-      rho_new   = TCSCF_bi_ort_dm_ao
-      !print*, rho_new
-      rho_delta = 0.d0
-      do i = 1, ao_num 
-        do j = 1, ao_num 
-          rho_delta += dabs(rho_new(j,i) - rho_old(j,i))
-        enddo
-      enddo
-      print*, ' rho_delta =', rho_delta
-      rho_old = rho_new
-
-      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
-      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
-      TOUCH mo_l_coef mo_r_coef
-
-      call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
-
-    enddo
-
-  else
-   do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. it .lt. n_it_tcscf_max )
-      print*,'grad_hermit = ',grad_hermit
-      it += 1
-      print*,'iteration = ', it
-      print*,'***'
-      print*,'TC HF total energy = ', TC_HF_energy
-      print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
-      print*,'TC HF 2 e   energy = ', TC_HF_two_e_energy
-      print*,'TC HF 3 body       = ', diag_three_elem_hf
-      print*,'***'
-      call save_good_hermit_tc_eigvectors
-      TOUCH mo_coef 
-      call save_mos
-
-    enddo
-
-  endif
-
-  print*,'Energy converged !'
-  call print_energy_and_mos
-
-  deallocate(rho_old, rho_new)
-
-end subroutine routine_scf
-
-! ---
-
diff --git a/src/tc_scf/tc_scf_dm.irp.f b/src/tc_scf/tc_scf_dm.irp.f
index f6ae3e1f..4750199c 100644
--- a/src/tc_scf/tc_scf_dm.irp.f
+++ b/src/tc_scf/tc_scf_dm.irp.f
@@ -1,25 +1,39 @@
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) ]
- implicit none
- if(bi_ortho)then
-  TCSCF_density_matrix_ao_beta  = TCSCF_bi_ort_dm_ao_beta
- else
-  TCSCF_density_matrix_ao_beta  = SCF_density_matrix_ao_beta
- endif
+
+  implicit none
+
+  if(bi_ortho) then
+    PROVIDE mo_l_coef mo_r_coef
+    TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta
+  else
+    TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta
+  endif
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num) ]
- implicit none
- if(bi_ortho)then
-  TCSCF_density_matrix_ao_alpha  = TCSCF_bi_ort_dm_ao_alpha
- else
-  TCSCF_density_matrix_ao_alpha  = SCF_density_matrix_ao_alpha
- endif
+
+  implicit none
+
+  if(bi_ortho) then
+    PROVIDE mo_l_coef mo_r_coef
+    TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha
+  else
+    TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha
+  endif
+
 END_PROVIDER 
 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num) ]
- implicit none
-  TCSCF_density_matrix_ao_tot  = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha
+  implicit none
+  TCSCF_density_matrix_ao_tot = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha
 END_PROVIDER 
 
 
diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f
index aa2a16ff..611b8b4c 100644
--- a/src/tc_scf/tc_scf_energy.irp.f
+++ b/src/tc_scf/tc_scf_energy.irp.f
@@ -1,6 +1,6 @@
 
  BEGIN_PROVIDER [ double precision, TC_HF_energy]
-&BEGIN_PROVIDER [ double precision, TC_HF_one_electron_energy]
+&BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy]
 &BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy]
 
   BEGIN_DOC
@@ -10,20 +10,22 @@
   implicit none
   integer :: i, j
 
+  PROVIDE mo_l_coef mo_r_coef
+
   TC_HF_energy = nuclear_repulsion
-  TC_HF_one_electron_energy = 0.d0
+  TC_HF_one_e_energy = 0.d0
   TC_HF_two_e_energy = 0.d0
 
   do j = 1, ao_num
     do i = 1, ao_num
       TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_non_hermit_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) &
-                                    + two_e_tc_non_hermit_integral_beta(i,j)  * TCSCF_density_matrix_ao_beta(i,j) )
-      TC_HF_one_electron_energy += ao_one_e_integrals_tc_tot(i,j) &
-                                       * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
+                                    + two_e_tc_non_hermit_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) )
+      TC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) &
+                          * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
     enddo
   enddo
 
-  TC_HF_energy += TC_HF_one_electron_energy + TC_HF_two_e_energy
+  TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy
   TC_HF_energy += diag_three_elem_hf
 
 END_PROVIDER
diff --git a/src/tc_scf/tc_scf_utils.irp.f b/src/tc_scf/tc_scf_utils.irp.f
index 09a4a1b9..dde477c4 100644
--- a/src/tc_scf/tc_scf_utils.irp.f
+++ b/src/tc_scf/tc_scf_utils.irp.f
@@ -40,3 +40,4 @@ subroutine LTxSxR(n, m, L, S, R, C)
 end subroutine LTxR
 
 ! ---
+
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
new file mode 100644
index 00000000..6abeddf1
--- /dev/null
+++ b/src/tc_scf/test_int.irp.f
@@ -0,0 +1,1008 @@
+program test_ints
+
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting test_ints ...'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 15 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  my_extra_grid_becke = .True.
+  my_n_pt_r_extra_grid = 30
+  my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug
+  touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
+
+!! OK 
+!call routine_int2_u_grad1u_j1b2 
+!! OK
+!call routine_v_ij_erf_rk_cst_mu_j1b
+!! OK 
+! call routine_x_v_ij_erf_rk_cst_mu_j1b
+!! OK
+! call routine_v_ij_u_cst_mu_j1b
+
+!! OK
+!call routine_int2_u2_j1b2
+
+!! OK
+!call routine_int2_u_grad1u_x_j1b2
+
+!! OK 
+! call routine_int2_grad1u2_grad2u2_j1b2
+! call routine_int2_u_grad1u_j1b2
+! call test_total_grad_lapl
+! call test_total_grad_square
+! call test_ao_tc_int_chemist
+! call test_grid_points_ao
+! call test_tc_scf
+ !call test_int_gauss
+
+  !call test_fock_3e_uhf_ao()
+  !call test_fock_3e_uhf_mo()
+
+  !call test_tc_grad_and_lapl_ao()
+  !call test_tc_grad_square_ao()
+
+  call test_two_e_tc_non_hermit_integral()
+
+end
+
+! ---
+
+subroutine test_tc_scf
+ implicit none
+ integer :: i
+! provide int2_u_grad1u_x_j1b2_test
+ provide x_v_ij_erf_rk_cst_mu_j1b_test
+! do i = 1, ng_fit_jast
+!  print*,expo_gauss_1_erf_x_2(i),coef_gauss_1_erf_x_2(i)
+! enddo
+! provide tc_grad_square_ao_test
+!  provide tc_grad_and_lapl_ao_test
+! provide int2_u_grad1u_x_j1b2_test
+! provide x_v_ij_erf_rk_cst_mu_j1b_test
+! print*,'TC_HF_energy = ',TC_HF_energy
+! print*,'grad_non_hermit = ',grad_non_hermit
+end
+
+subroutine test_ao_tc_int_chemist
+ implicit none
+ provide ao_tc_int_chemist
+! provide ao_tc_int_chemist_test
+! provide tc_grad_square_ao_test
+! provide tc_grad_and_lapl_ao_test
+end
+
+! ---
+
+subroutine routine_test_j1b
+ implicit none
+ integer :: i,icount,j
+ icount = 0
+ do i = 1, List_all_comb_b3_size
+  if(dabs(List_all_comb_b3_coef(i)).gt.1.d-10)then
+   print*,''
+   print*,List_all_comb_b3_expo(i),List_all_comb_b3_coef(i)
+   print*,List_all_comb_b3_cent(1:3,i)
+   print*,''
+   icount += 1
+  endif
+  
+ enddo
+ print*,'List_all_comb_b3_coef,icount = ',List_all_comb_b3_size,icount
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do icount = 1, List_comb_thr_b3_size(j,i)
+    print*,'',j,i
+    print*,List_comb_thr_b3_expo(icount,j,i),List_comb_thr_b3_coef(icount,j,i)
+    print*,List_comb_thr_b3_cent(1:3,icount,j,i)
+    print*,''
+   enddo
+!   enddo
+  enddo
+ enddo
+ print*,'max_List_comb_thr_b3_size = ',max_List_comb_thr_b3_size,List_all_comb_b3_size
+
+end
+
+subroutine routine_int2_u_grad1u_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u_grad1u_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u_grad1u_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_erf_rk_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_x_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += x_v_ij_erf_rk_cst_mu_j1b     (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+
+subroutine routine_v_ij_u_cst_mu_j1b_test
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_int2_grad1u2_grad2u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ integer :: ii , jj
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+ double precision, allocatable :: ints(:,:,:)
+ allocate(ints(ao_num, ao_num, n_points_final_grid))
+! do ipoint = 1, n_points_final_grid
+!  do i = 1, ao_num
+!   do j = 1, ao_num
+!    read(33,*)ints(j,i,ipoint)
+!   enddo
+!  enddo
+! enddo
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!        print*,j,i,ipoint
+!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint))
+!        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint))
+!        stop
+!       endif
+!      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ double precision :: e_ref, e_new
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+ e_ref = 0.d0
+ e_new = 0.d0
+ do ii = 1, elec_alpha_num
+  do jj = ii, elec_alpha_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     do i = 1, ao_num
+      do j = 1, ao_num
+       e_ref += mo_coef(j,ii) * mo_coef(i,ii) * array_ref(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       e_new += mo_coef(j,ii) * mo_coef(i,ii) * array(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+       accu_abs += contrib
+!       if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+!        accu_relat += contrib/dabs(array_ref(j,i,l,k))
+!       endif
+      enddo
+     enddo
+    enddo
+   enddo
+
+  enddo
+ enddo
+ print*,'e_ref = ',e_ref
+ print*,'e_new = ',e_new
+! print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+! print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_int2_u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_int2_u_grad1u_x_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += int2_u_grad1u_x_j1b2_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += int2_u_grad1u_x_j1b2     (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_u_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+end
+
+! ---
+
+subroutine test_fock_3e_uhf_ao()
+
+  implicit none
+  integer                       :: i, j
+  double precision              :: diff_tot, diff_ij, thr_ih, norm
+  double precision, allocatable :: fock_3e_uhf_ao_a_mo(:,:), fock_3e_uhf_ao_b_mo(:,:)
+
+  thr_ih = 1d-7
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_ao_a fock_3e_uhf_ao_b
+
+  ! ---
+
+  allocate(fock_3e_uhf_ao_a_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_a   , size(fock_3e_uhf_ao_a   , 1) &
+                        , fock_3e_uhf_ao_a_mo, size(fock_3e_uhf_ao_a_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_a_mo(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_a_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_a_mo)
+
+  ! ---
+
+  allocate(fock_3e_uhf_ao_b_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_b   , size(fock_3e_uhf_ao_b   , 1) &
+                        , fock_3e_uhf_ao_b_mo, size(fock_3e_uhf_ao_b_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_b_mo(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_b_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_b_mo)
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_ao()
+
+! ---
+
+subroutine test_fock_3e_uhf_mo()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff_ij, thr_ih, norm
+
+  thr_ih = 1d-12
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_b
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_a     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, '      norm_a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_b     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, '      norm_b = ', norm
+  print *, ' '
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_mo
+
+! ---
+
+subroutine test_total_grad_lapl
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_and_lapl_ao_test(j,i,l,k) - tc_grad_and_lapl_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_and_lapl_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_and_lapl_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_total_grad_square
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_square_ao_test(j,i,l,k) - tc_grad_square_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_square_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_square_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_grid_points_ao
+ implicit none
+ integer :: i,j,ipoint,icount,icount_good, icount_bad,icount_full
+ double precision :: thr
+ thr = 1.d-10
+! print*,'max_n_pts_grid_ao_prod = ',max_n_pts_grid_ao_prod
+! print*,'n_pts_grid_ao_prod'
+ do i = 1, ao_num
+  do j = i, ao_num
+  icount = 0
+  icount_good = 0
+  icount_bad = 0
+  icount_full = 0
+  do ipoint = 1, n_points_final_grid
+!   if(dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,1)) & 
+!    + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,2)) &
+!    + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,3)) )
+!   if(dabs(int2_u2_j1b2_test(j,i,ipoint)).gt.thr)then
+!    icount += 1
+!   endif
+   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_full += 1
+   endif
+   if(dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)).gt.thr)then
+    icount += 1
+    if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_good += 1
+    else
+    print*,j,i,ipoint
+    print*,dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint))/dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint))
+    icount_bad  += 1
+    endif
+   endif
+!   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr)then
+!   endif
+  enddo
+   print*,''
+   print*,j,i
+   print*,icount,icount_full, icount_bad!,n_pts_grid_ao_prod(j,i)
+   print*,dble(icount)/dble(n_points_final_grid),dble(icount_full)/dble(n_points_final_grid)
+!          dble(n_pts_grid_ao_prod(j,i))/dble(n_points_final_grid)
+!   if(icount.gt.n_pts_grid_ao_prod(j,i))then
+!    print*,'pb !!'
+!   endif
+  enddo
+ enddo
+end
+
+subroutine test_int_gauss
+ implicit none
+ integer :: i,j
+ print*,'center'
+ do i = 1, ao_num
+  do j = i, ao_num
+   print*,j,i
+   print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i)
+   print*,ao_prod_center(1:3,j,i)
+  enddo
+ enddo
+ print*,''
+ double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2
+ center = 0.d0
+ pi = dacos(-1.d0)
+ integral_1 = 0.d0
+ integral_2 = 0.d0
+ alpha = 0.75d0
+ do i = 1,  n_points_final_grid
+  ! you get x, y and z of the ith grid point
+  r(1) = final_grid_points(1,i)
+  r(2) = final_grid_points(2,i)
+  r(3) = final_grid_points(3,i)
+  weight = final_weight_at_r_vector(i)
+  distance = dsqrt( (r(1) - center(1))**2 +  (r(2) - center(2))**2 + (r(3) - center(3))**2 )
+  f_r = dexp(-alpha * distance*distance)
+  ! you add the contribution of the grid point to the integral
+  integral_1 += f_r * weight
+  integral_2 += f_r * distance * weight
+ enddo
+ print*,'integral_1      =',integral_1
+ print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5
+ print*,'integral_2      =',integral_2
+ print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2
+
+
+end
+
+! ---
+
+subroutine test_tc_grad_and_lapl_ao()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE tc_grad_and_lapl_ao tc_grad_and_lapl_ao_loop
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          diff = dabs(tc_grad_and_lapl_ao_loop(l,k,j,i) - tc_grad_and_lapl_ao(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' loops : ', tc_grad_and_lapl_ao_loop(l,k,j,i)
+            print *, ' lapack: ', tc_grad_and_lapl_ao     (l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_and_lapl_ao_loop(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+
+end
+
+! ---
+
+subroutine test_tc_grad_square_ao()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE tc_grad_square_ao tc_grad_square_ao_loop
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          diff = dabs(tc_grad_square_ao_loop(l,k,j,i) - tc_grad_square_ao(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' loops : ', tc_grad_square_ao_loop(l,k,j,i)
+            print *, ' lapack: ', tc_grad_square_ao     (l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_square_ao_loop(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+
+end
+
+! ---
+
+subroutine test_two_e_tc_non_hermit_integral()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE two_e_tc_non_hermit_integral_beta two_e_tc_non_hermit_integral_alpha
+  PROVIDE two_e_tc_non_hermit_integral_seq_beta two_e_tc_non_hermit_integral_seq_alpha
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      diff = dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i) - two_e_tc_non_hermit_integral_alpha(j,i))
+      if(diff .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' seq         : ', two_e_tc_non_hermit_integral_seq_alpha(j,i)
+        print *, ' //          : ', two_e_tc_non_hermit_integral_alpha    (j,i)
+        !stop
+      endif
+
+      norm     += dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i))
+      diff_tot += diff
+    enddo
+  enddo
+
+  print *, ' diff tot a = ', diff_tot / norm
+  print *, '     norm a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      diff = dabs(two_e_tc_non_hermit_integral_seq_beta(j,i) - two_e_tc_non_hermit_integral_beta(j,i))
+      if(diff .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' seq         : ', two_e_tc_non_hermit_integral_seq_beta(j,i)
+        print *, ' //          : ', two_e_tc_non_hermit_integral_beta    (j,i)
+        !stop
+      endif
+
+      norm     += dabs(two_e_tc_non_hermit_integral_seq_beta(j,i))
+      diff_tot += diff
+    enddo
+  enddo
+
+  print *, ' diff tot b = ', diff_tot / norm
+  print *, '     norm b = ', norm
+  print *, ' '
+
+  ! ---
+
+  return
+
+end
+
+! ---
+
diff --git a/src/tools/print_he_energy.irp.f b/src/tools/print_he_energy.irp.f
index 87488fba..8daa2b8b 100644
--- a/src/tools/print_he_energy.irp.f
+++ b/src/tools/print_he_energy.irp.f
@@ -7,8 +7,8 @@ program print_he_energy
 
   call print_overlap()
 
-  call print_energy1()
-  call print_energy2()
+  !call print_energy1()
+  !call print_energy2()
 
 end