From 17d8197a674b0f9acd2db0f89d36b408f34ba466 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Mon, 6 Feb 2023 18:17:56 +0100
Subject: [PATCH 1/7] added ao_many_one_e_ints/ bi_ortho_mos/

---
 external/qp2-dependencies                     |    2 +-
 src/ao_many_one_e_ints/NEED                   |    5 +
 src/ao_many_one_e_ints/README.rst             |   25 +
 src/ao_many_one_e_ints/ao_erf_gauss.irp.f     | 1113 +++++++++++++++++
 .../ao_erf_gauss_grad.irp.f                   |  150 +++
 src/ao_many_one_e_ints/ao_gaus_gauss.irp.f    |  426 +++++++
 src/ao_many_one_e_ints/fit_slat_gauss.irp.f   |   94 ++
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   |  517 ++++++++
 src/ao_many_one_e_ints/grad2_jmu_modif.irp.f  |  420 +++++++
 .../grad2_jmu_modif_vect.irp.f                |  453 +++++++
 .../grad_lapl_jmu_manu.irp.f                  |  369 ++++++
 .../grad_lapl_jmu_modif.irp.f                 |  300 +++++
 .../grad_related_ints.irp.f                   |  437 +++++++
 src/ao_many_one_e_ints/list_grid.irp.f        |   59 +
 src/ao_many_one_e_ints/listj1b.irp.f          |  237 ++++
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   |  191 +++
 .../prim_int_erf_gauss.irp.f                  |  195 +++
 .../prim_int_gauss_gauss.irp.f                |  340 +++++
 src/ao_many_one_e_ints/stg_gauss_int.irp.f    |  121 ++
 src/ao_many_one_e_ints/taylor_exp.irp.f       |  101 ++
 .../xyz_grad_xyz_ao_pol.irp.f                 |  343 +++++
 src/bi_ortho_mos/EZFIO.cfg                    |   11 +
 src/bi_ortho_mos/NEED                         |    3 +
 src/bi_ortho_mos/bi_density.irp.f             |   70 ++
 src/bi_ortho_mos/bi_ort_mos_in_r.irp.f        |  137 ++
 src/bi_ortho_mos/grad_bi_ort_mos_in_r.irp.f   |  100 ++
 src/bi_ortho_mos/mos_rl.irp.f                 |  224 ++++
 src/bi_ortho_mos/overlap.irp.f                |  160 +++
 28 files changed, 6602 insertions(+), 1 deletion(-)
 create mode 100644 src/ao_many_one_e_ints/NEED
 create mode 100644 src/ao_many_one_e_ints/README.rst
 create mode 100644 src/ao_many_one_e_ints/ao_erf_gauss.irp.f
 create mode 100644 src/ao_many_one_e_ints/ao_erf_gauss_grad.irp.f
 create mode 100644 src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
 create mode 100644 src/ao_many_one_e_ints/fit_slat_gauss.irp.f
 create mode 100644 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
 create mode 100644 src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
 create mode 100644 src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
 create mode 100644 src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
 create mode 100644 src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
 create mode 100644 src/ao_many_one_e_ints/grad_related_ints.irp.f
 create mode 100644 src/ao_many_one_e_ints/list_grid.irp.f
 create mode 100644 src/ao_many_one_e_ints/listj1b.irp.f
 create mode 100644 src/ao_many_one_e_ints/listj1b_sorted.irp.f
 create mode 100644 src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f
 create mode 100644 src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
 create mode 100644 src/ao_many_one_e_ints/stg_gauss_int.irp.f
 create mode 100644 src/ao_many_one_e_ints/taylor_exp.irp.f
 create mode 100644 src/ao_many_one_e_ints/xyz_grad_xyz_ao_pol.irp.f
 create mode 100644 src/bi_ortho_mos/EZFIO.cfg
 create mode 100644 src/bi_ortho_mos/NEED
 create mode 100644 src/bi_ortho_mos/bi_density.irp.f
 create mode 100644 src/bi_ortho_mos/bi_ort_mos_in_r.irp.f
 create mode 100644 src/bi_ortho_mos/grad_bi_ort_mos_in_r.irp.f
 create mode 100644 src/bi_ortho_mos/mos_rl.irp.f
 create mode 100644 src/bi_ortho_mos/overlap.irp.f

diff --git a/external/qp2-dependencies b/external/qp2-dependencies
index 242151e0..f40bde09 160000
--- a/external/qp2-dependencies
+++ b/external/qp2-dependencies
@@ -1 +1 @@
-Subproject commit 242151e03d1d6bf042387226431d82d35845686a
+Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8
diff --git a/src/ao_many_one_e_ints/NEED b/src/ao_many_one_e_ints/NEED
new file mode 100644
index 00000000..0d08442c
--- /dev/null
+++ b/src/ao_many_one_e_ints/NEED
@@ -0,0 +1,5 @@
+ao_one_e_ints
+ao_two_e_ints
+becke_numerical_grid
+mo_one_e_ints
+dft_utils_in_r
diff --git a/src/ao_many_one_e_ints/README.rst b/src/ao_many_one_e_ints/README.rst
new file mode 100644
index 00000000..6d2c083f
--- /dev/null
+++ b/src/ao_many_one_e_ints/README.rst
@@ -0,0 +1,25 @@
+==================
+ao_many_one_e_ints
+==================
+
+This module contains A LOT of one-electron integrals of the type 
+A_ij( r ) = \int dr' phi_i(r') w(r,r') phi_j(r') 
+where r is a point in real space. 
+
++) ao_gaus_gauss.irp.f: w(r,r') is a exp(-(r-r')^2)  , and can be multiplied by x/y/z
++) ao_erf_gauss.irp.f : w(r,r') is a exp(-(r-r')^2) erf(mu * |r-r'|)/|r-r'| , and can be multiplied by x/y/z
++) ao_erf_gauss_grad.irp.f: w(r,r') is a exp(-(r-r')^2) erf(mu * |r-r'|)/|r-r'| , and can be multiplied by x/y/z, but evaluated with also one gradient of an AO function. 
+
+Fit of a Slater function and corresponding integrals
+----------------------------------------------------
+The file fit_slat_gauss.irp.f contains many useful providers/routines to fit a Slater function with 20 gaussian. 
++) coef_fit_slat_gauss : coefficients of the gaussians to fit e^(-x)
++) expo_fit_slat_gauss : exponents of the gaussians to fit e^(-x)
+
+Integrals involving Slater functions : stg_gauss_int.irp.f
+
+Taylor expansion of full correlation factor
+-------------------------------------------
+In taylor_exp.irp.f you might find interesting integrals of the type 
+\int dr' exp( e^{-alpha |r-r|' - beta |r-r'|^2}) phi_i(r') phi_j(r') 
+evaluated as a Taylor expansion of the exponential. 
diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
new file mode 100644
index 00000000..3d7fbe50
--- /dev/null
+++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
@@ -0,0 +1,1113 @@
+
+! ---
+
+subroutine phi_j_erf_mu_r_xyz_phi(i,j,mu_in, C_center, xyz_ints)
+ implicit none
+ BEGIN_DOC
+! xyz_ints(1/2/3) = int dr phi_j(r) [erf(mu  |r - C|)/|r-C|]  x/y/z phi_i(r)
+!
+! where phi_i and phi_j are AOs
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in) :: mu_in, C_center(3)
+ double precision, intent(out):: xyz_ints(3)
+ integer :: num_A,power_A(3), num_b, power_B(3),power_B_tmp(3)
+ double precision :: alpha, beta, A_center(3), B_center(3),contrib,NAI_pol_mult_erf
+ integer :: n_pt_in,l,m,mm
+ xyz_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.1.d-12)then
+  return
+ endif
+ n_pt_in = n_pt_max_integrals
+ ! j
+ num_A = ao_nucl(j)
+ power_A(1:3)= ao_power(j,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ ! i
+ num_B = ao_nucl(i)
+ power_B(1:3)= ao_power(i,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+
+ do l=1,ao_prim_num(j)
+  alpha = ao_expo_ordered_transp(l,j)
+  do m=1,ao_prim_num(i)
+    beta = ao_expo_ordered_transp(m,i)
+    do mm = 1, 3
+     ! (x phi_i ) * phi_j
+     ! x * (x - B_x)^b_x = b_x (x - B_x)^b_x + 1 * (x - B_x)^{b_x+1}
+     !
+     ! first contribution :: B_x (x - B_x)^b_x :: usual integral multiplied by B_x
+     power_B_tmp = power_B
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)
+     xyz_ints(mm) += contrib * B_center(mm) * ao_coef_normalized_ordered_transp(l,j)             &
+                                            * ao_coef_normalized_ordered_transp(m,i)
+     ! second contribution :: 1 * (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1
+     power_B_tmp(mm) += 1
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)
+     xyz_ints(mm) += contrib * 1.d0        * ao_coef_normalized_ordered_transp(l,j)             &
+                                           * ao_coef_normalized_ordered_transp(m,i)
+    enddo
+  enddo
+ enddo
+end
+
+! ---
+
+double precision function phi_j_erf_mu_r_phi(i, j, mu_in, C_center)
+
+  BEGIN_DOC
+  ! phi_j_erf_mu_r_phi  = int dr phi_j(r) [erf(mu  |r - C|)/|r-C|]  phi_i(r)
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i,j
+  double precision, intent(in) :: mu_in, C_center(3)
+
+  integer          :: num_A, power_A(3), num_b, power_B(3)
+  integer          :: n_pt_in, l, m
+  double precision :: alpha, beta, A_center(3), B_center(3), contrib
+
+  double precision :: NAI_pol_mult_erf
+
+  phi_j_erf_mu_r_phi = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  n_pt_in = n_pt_max_integrals
+
+  ! j
+  num_A         = ao_nucl(j)
+  power_A(1:3)  = ao_power(j,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+
+  ! i
+  num_B         = ao_nucl(i)
+  power_B(1:3)  = ao_power(i,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  do l = 1, ao_prim_num(j)
+   alpha = ao_expo_ordered_transp(l,j)
+   do m = 1, ao_prim_num(i)
+     beta = ao_expo_ordered_transp(m,i)
+
+     contrib = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+
+     phi_j_erf_mu_r_phi += contrib * ao_coef_normalized_ordered_transp(l,j) *  ao_coef_normalized_ordered_transp(m,i)
+   enddo
+  enddo
+
+end function phi_j_erf_mu_r_phi
+
+! ---
+
+subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints)
+ implicit none
+ BEGIN_DOC
+  ! gauss_ints(m) =   \int dr exp(-delta (r - C)^2 ) x/y/z * ( 1 - erf(mu |r-r'|))/ |r-r'| * AO_i(r') * AO_j(r')
+  !
+  ! with m = 1 ==> x, m = 2, m = 3 ==> z
+  !
+  !      m = 4 ==> no x/y/z
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in) :: mu, C_center(3),delta
+ double precision, intent(out):: gauss_ints(4)
+
+ integer :: num_A,power_A(3), num_b, power_B(3)
+ double precision :: alpha, beta, A_center(3), B_center(3),contrib,NAI_pol_mult_erf
+ double precision :: xyz_ints(4)
+ integer :: n_pt_in,l,m,mm
+ gauss_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.1.d-12)then
+  return
+ endif
+ n_pt_in = n_pt_max_integrals
+ ! j
+ num_A = ao_nucl(j)
+ power_A(1:3)= ao_power(j,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ ! i
+ num_B = ao_nucl(i)
+ power_B(1:3)= ao_power(i,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+
+ gauss_ints = 0.d0
+ do l=1,ao_prim_num(j)
+  alpha = ao_expo_ordered_transp(l,j)
+  do m=1,ao_prim_num(i)
+    beta = ao_expo_ordered_transp(m,i)
+    call erfc_mu_gauss_xyz(C_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in,xyz_ints)
+    do mm = 1, 4
+     gauss_ints(mm) += xyz_ints(mm)  * ao_coef_normalized_ordered_transp(l,j)             &
+                                     * ao_coef_normalized_ordered_transp(m,i)
+    enddo
+  enddo
+ enddo
+end
+
+! ---
+
+subroutine erf_mu_gauss_ij_ao(i, j, mu, C_center, delta, gauss_ints)
+
+  BEGIN_DOC
+  !
+  ! gauss_ints = \int dr exp(-delta (r - C)^2) * erf(mu |r-C|) / |r-C| * AO_i(r) * AO_j(r)
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, j
+  double precision, intent(in)  :: mu, C_center(3), delta
+  double precision, intent(out) :: gauss_ints
+
+  integer                       :: n_pt_in, l, m
+  integer                       :: num_A, power_A(3), num_b, power_B(3)
+  double precision              :: alpha, beta, A_center(3), B_center(3), coef
+  double precision              :: integral
+
+  double precision              :: erf_mu_gauss
+
+  gauss_ints = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  n_pt_in = n_pt_max_integrals
+
+  ! j
+  num_A         = ao_nucl(j)
+  power_A(1:3)  = ao_power(j,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+
+  ! i
+  num_B         = ao_nucl(i)
+  power_B(1:3)  = ao_power(i,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  do l = 1, ao_prim_num(j)
+   alpha = ao_expo_ordered_transp(l,j)
+   do m = 1, ao_prim_num(i)
+     beta = ao_expo_ordered_transp(m,i)
+     coef = ao_coef_normalized_ordered_transp(l,j) * ao_coef_normalized_ordered_transp(m,i)
+
+     if(dabs(coef) .lt. 1.d-12) cycle
+
+     integral = erf_mu_gauss(C_center, delta, mu, A_center, B_center, power_A, power_B, alpha, beta, n_pt_in)
+
+     gauss_ints += integral * coef
+   enddo
+  enddo
+
+end subroutine erf_mu_gauss_ij_ao
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+  double precision, intent(out) :: ints(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3), m
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  ints = 0.d0
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      power_xA = power_A
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        beta = ao_expo_ordered_transp(j,j_ao)
+        coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        integral = NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+        ints(m) += integral * coef
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+        ints(m) += A_center(m) * integral * coef
+
+      enddo
+    enddo
+  enddo
+
+end subroutine NAI_pol_x_mult_erf_ao
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, LD_C, LD_ints, n_points
+  double precision, intent(in)  :: mu_in, C_center(LD_C,3)
+  double precision, intent(out) :: ints(LD_ints,3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in
+  integer                       :: power_xA(3), m, ipoint
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef
+  double precision, allocatable :: integral(:)
+
+  ints(1:LD_ints,1:3) = 0.d0
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  allocate(integral(n_points))
+  integral = 0.d0
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA = power_A
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        beta = ao_expo_ordered_transp(j,j_ao)
+        coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += A_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_v0
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, LD_C, LD_ints, n_points(3)
+  double precision, intent(in)  :: mu_in, C_center(LD_C,3,3)
+  double precision, intent(out) :: ints(LD_ints,3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, LD_integral
+  integer                       :: power_xA(3), m, ipoint, n_points_m
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef
+  double precision, allocatable :: integral(:)
+
+  ints(1:LD_ints,1:3) = 0.d0
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  LD_integral = max(max(n_points(1), n_points(2)), n_points(3))
+  allocate(integral(LD_integral))
+  integral = 0.d0
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+      n_points_m = n_points(m)
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA = power_A
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        beta = ao_expo_ordered_transp(j,j_ao)
+        coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_v( A_center, B_center, power_xA, power_B, alpha, beta & 
+                               , C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_v( A_center, B_center, power_A, power_B, alpha, beta &
+                               , C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += A_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_v
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_x(i_ao, j_ao, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  NAI_pol_x_mult_erf_ao_x = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  power_xA = power_A
+  power_xA(1) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_x += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_x += A_center(1) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_x
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_y(i_ao, j_ao, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  NAI_pol_x_mult_erf_ao_y = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  power_xA = power_A
+  power_xA(2) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_y += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_y += A_center(2) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_y
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_z(i_ao, j_ao, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  NAI_pol_x_mult_erf_ao_z = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  power_xA = power_A
+  power_xA(3) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_z += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_z += A_center(3) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_z
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_with1s_x(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3)
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+  double precision, external    :: NAI_pol_x_mult_erf_ao_x
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_x_mult_erf_ao_with1s_x = NAI_pol_x_mult_erf_ao_x(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  NAI_pol_x_mult_erf_ao_with1s_x = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  power_xA     = power_Ai
+  power_xA(1) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alphaj = ao_expo_ordered_transp                   (j,j_ao)
+      coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+
+      ! First term = (x-Ax)**(ax+1)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_x += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_x += Ai_center(1) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_with1s_x
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_with1s_y(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3)
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+  double precision, external    :: NAI_pol_x_mult_erf_ao_y
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_x_mult_erf_ao_with1s_y = NAI_pol_x_mult_erf_ao_y(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  NAI_pol_x_mult_erf_ao_with1s_y = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  power_xA     = power_Ai
+  power_xA(2) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alphaj = ao_expo_ordered_transp                   (j,j_ao)
+      coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+
+      ! First term = (x-Ax)**(ax+1)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_y += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_y += Ai_center(2) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_with1s_y
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_with1s_z(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3)
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+  double precision, external    :: NAI_pol_x_mult_erf_ao_z
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_x_mult_erf_ao_with1s_z = NAI_pol_x_mult_erf_ao_z(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  NAI_pol_x_mult_erf_ao_with1s_z = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  power_xA     = power_Ai
+  power_xA(3) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alphaj = ao_expo_ordered_transp                   (j,j_ao)
+      coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+
+      ! First term = (x-Ax)**(ax+1)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_z += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_z += Ai_center(3) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_with1s_z
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+  double precision, intent(out) :: ints(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    call NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
+    return
+  endif
+
+  ints = 0.d0
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA     = power_Ai
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        alphaj = ao_expo_ordered_transp                   (j,j_ao)
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        integral = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in)
+        ints(m) += integral * coef
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        integral = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in)
+        ints(m) += Ai_center(m) * integral * coef
+
+      enddo
+    enddo
+  enddo
+
+end subroutine NAI_pol_x_mult_erf_ao_with1s
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_with1s_v0(i_ao, j_ao, beta, B_center, LD_B, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, LD_B, LD_C, LD_ints, n_points
+  double precision, intent(in)  :: beta, mu_in
+  double precision, intent(in)  :: B_center(LD_B,3), C_center(LD_C,3)
+  double precision, intent(out) :: ints(LD_ints,3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision              :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
+
+  integer                       :: ipoint
+  double precision, allocatable :: integral(:)
+
+  if(beta .lt. 1d-10) then
+    call NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+    return
+  endif
+
+  ints(1:LD_ints,1:3) = 0.d0
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  allocate(integral(n_points))
+  integral = 0.d0
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA     = power_Ai
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        alphaj = ao_expo_ordered_transp                   (j,j_ao)
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3), LD_B, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+
+        do ipoint = 1, n_points
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3), LD_B, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+        do ipoint = 1, n_points
+          ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_with1s_v0
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, LD_B, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, LD_B, LD_C, LD_ints, n_points(3)
+  double precision, intent(in)  :: beta, mu_in
+  double precision, intent(in)  :: B_center(LD_B,3,3), C_center(LD_C,3,3)
+  double precision, intent(out) :: ints(LD_ints,3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision              :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
+
+  integer                       :: ipoint, n_points_m, LD_integral
+  double precision, allocatable :: integral(:)
+
+  if(beta .lt. 1d-10) then
+    print *, 'small beta', i_ao, j_ao
+    call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+    return
+  endif
+
+  ints(1:LD_ints,1:3) = 0.d0
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  LD_integral = max(max(n_points(1), n_points(2)), n_points(3))
+  allocate(integral(LD_integral))
+  integral = 0.d0
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+      n_points_m = n_points(m)
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA     = power_Ai
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        alphaj = ao_expo_ordered_transp                   (j,j_ao)
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3,m), LD_B, C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3,m), LD_B, C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_with1s_v
+
+! ---
+
+subroutine NAI_pol_x_specify_mult_erf_ao(i_ao,j_ao,mu_in,C_center,m,ints)
+ implicit none
+  BEGIN_DOC
+  ! Computes the following integral :
+  ! $\int_{-\infty}^{infty} dr X(m) * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! if m == 1 X(m) = x, m == 1 X(m) = y, m == 1 X(m) = z
+  END_DOC
+ include 'utils/constants.include.F'
+ integer, intent(in) :: i_ao,j_ao,m
+ double precision, intent(in) :: mu_in, C_center(3)
+ double precision, intent(out):: ints
+ double precision               :: A_center(3), B_center(3),integral, alpha,beta
+ double precision               :: NAI_pol_mult_erf
+ integer                        :: i,j,num_A,num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+ ints = 0.d0
+ if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12)then
+  return
+ endif
+ num_A = ao_nucl(i_ao)
+ power_A(1:3)= ao_power(i_ao,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ num_B = ao_nucl(j_ao)
+ power_B(1:3)= ao_power(j_ao,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+ n_pt_in = n_pt_max_integrals
+
+ do i = 1, ao_prim_num(i_ao)
+  alpha = ao_expo_ordered_transp(i,i_ao)
+    power_xA = power_A
+    ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+    power_xA(m) += 1
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center,B_center,power_xA,power_B,alpha,beta,C_center,n_pt_in,mu_in)
+      ints += integral * ao_coef_normalized_ordered_transp(j,j_ao)*ao_coef_normalized_ordered_transp(i,i_ao)
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center,B_center,power_A,power_B,alpha,beta,C_center,n_pt_in,mu_in)
+      ints += A_center(m) * integral * ao_coef_normalized_ordered_transp(j,j_ao)*ao_coef_normalized_ordered_transp(i,i_ao)
+    enddo
+ enddo
+end
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/ao_erf_gauss_grad.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss_grad.irp.f
new file mode 100644
index 00000000..8a32c38a
--- /dev/null
+++ b/src/ao_many_one_e_ints/ao_erf_gauss_grad.irp.f
@@ -0,0 +1,150 @@
+subroutine phi_j_erf_mu_r_dxyz_phi(i,j,mu_in, C_center, dxyz_ints)
+ implicit none
+ BEGIN_DOC
+! dxyz_ints(1/2/3) = int dr phi_i(r) [erf(mu  |r - C|)/|r-C|]  d/d(x/y/z) phi_i(r)
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in) :: mu_in, C_center(3)
+ double precision, intent(out):: dxyz_ints(3)
+ integer :: num_A,power_A(3), num_b, power_B(3),power_B_tmp(3)
+ double precision :: alpha, beta, A_center(3), B_center(3),contrib,NAI_pol_mult_erf,coef,thr
+ integer :: n_pt_in,l,m,mm
+ thr = 1.d-12
+ dxyz_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.thr)then
+  return
+ endif
+
+ n_pt_in = n_pt_max_integrals
+ ! j 
+ num_A = ao_nucl(j)
+ power_A(1:3)= ao_power(j,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ ! i 
+ num_B = ao_nucl(i)
+ power_B(1:3)= ao_power(i,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+
+ do l=1,ao_prim_num(j)
+  alpha = ao_expo_ordered_transp(l,j)
+  do m=1,ao_prim_num(i)
+    beta = ao_expo_ordered_transp(m,i)
+    coef = ao_coef_normalized_ordered_transp(l,j) * ao_coef_normalized_ordered_transp(m,i) 
+    if(dabs(coef).lt.thr)cycle
+    do mm = 1, 3
+     ! (d/dx phi_i ) * phi_j 
+     ! d/dx * (x - B_x)^b_x exp(-beta * (x -B_x)^2)= [b_x * (x - B_x)^(b_x - 1) - 2 beta * (x - B_x)^(b_x + 1)] exp(-beta * (x -B_x)^2)
+     !
+     ! first contribution :: b_x (x - B_x)^(b_x-1) :: integral with b_x=>b_x-1 multiplied by b_x
+     power_B_tmp = power_B
+     power_B_tmp(mm) += -1
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+     dxyz_ints(mm) += contrib * dble(power_B(mm)) * coef 
+                                                  
+     ! second contribution ::  - 2 beta * (x - B_x)^(b_x + 1) :: integral with b_x=> b_x+1 multiplied by -2 * beta
+     power_B_tmp = power_B
+     power_B_tmp(mm) += 1
+     contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+     dxyz_ints(mm) += contrib * (-2.d0 * beta )  * coef 
+                                                 
+    enddo
+  enddo
+ enddo
+end
+
+
+
+
+subroutine phi_j_erf_mu_r_dxyz_phi_bis(i,j,mu_in, C_center, dxyz_ints)
+ implicit none
+ BEGIN_DOC
+! dxyz_ints(1/2/3) = int dr phi_j(r) [erf(mu  |r - C|)/|r-C|]  d/d(x/y/z) phi_i(r)
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in) :: mu_in, C_center(3)
+ double precision, intent(out):: dxyz_ints(3)
+ integer :: num_A,power_A(3), num_b, power_B(3),power_B_tmp(3)
+ double precision :: alpha, beta, A_center(3), B_center(3),contrib,NAI_pol_mult_erf
+ double precision :: thr, coef
+ integer :: n_pt_in,l,m,mm,kk
+ thr = 1.d-12
+ dxyz_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.thr)then
+  return
+ endif
+
+ n_pt_in = n_pt_max_integrals
+ ! j == A 
+ num_A = ao_nucl(j)
+ power_A(1:3)= ao_power(j,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ ! i == B
+ num_B = ao_nucl(i)
+ power_B(1:3)= ao_power(i,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+
+ dxyz_ints = 0.d0
+ do l=1,ao_prim_num(j)
+  alpha = ao_expo_ordered_transp(l,j)
+  do m=1,ao_prim_num(i)
+    beta = ao_expo_ordered_transp(m,i)
+    do kk = 1, 2 ! loop over the extra terms induced by the d/dx/y/z * AO(i)
+     do mm = 1, 3
+      power_B_tmp = power_B
+      power_B_tmp(mm) = power_ord_grad_transp(kk,mm,i)
+      coef = ao_coef_normalized_ordered_transp(l,j) * ao_coef_ord_grad_transp(kk,mm,m,i) 
+      if(dabs(coef).lt.thr)cycle
+      contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+      dxyz_ints(mm) += contrib * coef 
+     enddo
+    enddo
+  enddo
+ enddo
+end
+
+subroutine phi_j_erf_mu_r_xyz_dxyz_phi(i,j,mu_in, C_center, dxyz_ints)
+ implicit none
+ BEGIN_DOC
+! dxyz_ints(1/2/3) = int dr phi_j(r) x/y/z [erf(mu  |r - C|)/|r-C|]  d/d(x/y/z) phi_i(r)
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in) :: mu_in, C_center(3)
+ double precision, intent(out):: dxyz_ints(3)
+ integer :: num_A,power_A(3), num_b, power_B(3),power_B_tmp(3)
+ double precision :: alpha, beta, A_center(3), B_center(3),contrib,NAI_pol_mult_erf
+ double precision :: thr, coef
+ integer :: n_pt_in,l,m,mm,kk
+ thr = 1.d-12
+ dxyz_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.thr)then
+  return
+ endif
+
+ n_pt_in = n_pt_max_integrals
+ ! j == A 
+ num_A = ao_nucl(j)
+ power_A(1:3)= ao_power(j,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ ! i == B
+ num_B = ao_nucl(i)
+ power_B(1:3)= ao_power(i,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+
+ dxyz_ints = 0.d0
+ do l=1,ao_prim_num(j)
+  alpha = ao_expo_ordered_transp(l,j)
+  do m=1,ao_prim_num(i)
+    beta = ao_expo_ordered_transp(m,i)
+    do kk = 1, 4 ! loop over the extra terms induced by the x/y/z * d dx/y/z AO(i)
+     do mm = 1, 3
+      power_B_tmp = power_B
+      power_B_tmp(mm) = power_ord_xyz_grad_transp(kk,mm,i)
+      coef = ao_coef_normalized_ordered_transp(l,j) * ao_coef_ord_xyz_grad_transp(kk,mm,m,i) 
+      if(dabs(coef).lt.thr)cycle
+      contrib = NAI_pol_mult_erf(A_center,B_center,power_A,power_B_tmp,alpha,beta,C_center,n_pt_in,mu_in)  
+      dxyz_ints(mm) += contrib * coef 
+     enddo
+    enddo
+  enddo
+ enddo
+end
diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
new file mode 100644
index 00000000..d2115d9e
--- /dev/null
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -0,0 +1,426 @@
+! ---
+
+subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints)
+
+ implicit none
+ BEGIN_DOC
+! gauss_ints(m) = \int dr AO_i(r) AO_j(r) x/y/z e^{-delta |r-D_center|^2}
+!
+! with m == 1 ==> x, m == 2 ==> y, m == 3 ==> z
+ END_DOC
+ integer, intent(in) :: i,j
+ double precision, intent(in)  :: D_center(3), delta
+ double precision, intent(out) :: gauss_ints(3)
+
+ integer :: num_a,num_b,power_A(3), power_B(3),l,k,m
+ double precision :: A_center(3), B_center(3),overlap_gauss_r12,alpha,beta,gauss_ints_tmp(3)
+ gauss_ints = 0.d0
+ if(ao_overlap_abs(j,i).lt.1.d-12)then
+  return
+ endif
+ num_A = ao_nucl(i)
+ power_A(1:3)= ao_power(i,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ num_B = ao_nucl(j)
+ power_B(1:3)= ao_power(j,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+ do l=1,ao_prim_num(i)
+  alpha = ao_expo_ordered_transp(l,i)
+  do k=1,ao_prim_num(j)
+   beta = ao_expo_ordered_transp(k,j)
+   call overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints_tmp)
+   do m = 1, 3
+    gauss_ints(m) += gauss_ints_tmp(m) *  ao_coef_normalized_ordered_transp(l,i)             &
+                                       *  ao_coef_normalized_ordered_transp(k,j)
+   enddo
+  enddo
+ enddo
+
+end
+
+
+
+double precision function overlap_gauss_xyz_r12_ao_specific(D_center,delta,i,j,mx)
+ implicit none
+ BEGIN_DOC
+! \int dr AO_i(r) AO_j(r) x/y/z e^{-delta |r-D_center|^2}
+!
+! with mx == 1 ==> x, mx == 2 ==> y, mx == 3 ==> z
+ END_DOC
+ integer, intent(in) :: i,j,mx
+ double precision, intent(in)  :: D_center(3), delta
+
+ integer :: num_a,num_b,power_A(3), power_B(3),l,k
+ double precision :: gauss_int
+ double precision :: A_center(3), B_center(3),overlap_gauss_r12,alpha,beta
+ double precision :: overlap_gauss_xyz_r12_specific
+ overlap_gauss_xyz_r12_ao_specific = 0.d0
+ if(ao_overlap_abs(j,i).lt.1.d-12)then
+  return
+ endif
+ num_A = ao_nucl(i)
+ power_A(1:3)= ao_power(i,1:3)
+ A_center(1:3) = nucl_coord(num_A,1:3)
+ num_B = ao_nucl(j)
+ power_B(1:3)= ao_power(j,1:3)
+ B_center(1:3) = nucl_coord(num_B,1:3)
+ do l=1,ao_prim_num(i)
+  alpha = ao_expo_ordered_transp(l,i)
+  do k=1,ao_prim_num(j)
+   beta = ao_expo_ordered_transp(k,j)
+   gauss_int = overlap_gauss_xyz_r12_specific(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,mx)
+   overlap_gauss_xyz_r12_ao_specific = gauss_int *  ao_coef_normalized_ordered_transp(l,i)             &
+                                                 *  ao_coef_normalized_ordered_transp(k,j)
+  enddo
+ enddo
+end
+
+
+subroutine overlap_gauss_r12_all_ao(D_center,delta,aos_ints)
+ implicit none
+ double precision, intent(in) :: D_center(3), delta
+ double precision, intent(out):: aos_ints(ao_num,ao_num)
+
+ integer :: num_a,num_b,power_A(3), power_B(3),l,k,i,j
+ double precision :: A_center(3), B_center(3),overlap_gauss_r12,alpha,beta,analytical_j
+ aos_ints = 0.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(ao_overlap_abs(j,i).lt.1.d-12)cycle
+   num_A = ao_nucl(i)
+   power_A(1:3)= ao_power(i,1:3)
+   A_center(1:3) = nucl_coord(num_A,1:3)
+   num_B = ao_nucl(j)
+   power_B(1:3)= ao_power(j,1:3)
+   B_center(1:3) = nucl_coord(num_B,1:3)
+   do l=1,ao_prim_num(i)
+    alpha = ao_expo_ordered_transp(l,i)
+    do k=1,ao_prim_num(j)
+     beta = ao_expo_ordered_transp(k,j)
+     analytical_j = overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+     aos_ints(j,i) += analytical_j *  ao_coef_normalized_ordered_transp(l,i)             &
+                                   *  ao_coef_normalized_ordered_transp(k,j)
+    enddo
+   enddo
+  enddo
+ enddo
+end
+
+! ---
+
+! TODO :: PUT CYCLES IN LOOPS
+double precision function overlap_gauss_r12_ao(D_center, delta, i, j)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: D_center(3), delta
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j
+
+  double precision, external   :: overlap_gauss_r12
+
+  overlap_gauss_r12_ao = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      analytical_j = overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+      overlap_gauss_r12_ao += coef * analytical_j
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao
+
+! --
+
+double precision function overlap_abs_gauss_r12_ao(D_center, delta, i, j)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: D_center(3), delta
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j
+
+  double precision, external   :: overlap_abs_gauss_r12
+
+  overlap_abs_gauss_r12_ao = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      analytical_j = overlap_abs_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+      overlap_abs_gauss_r12_ao += dabs(coef * analytical_j)
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao
+
+! --
+
+subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
+
+  BEGIN_DOC
+  !
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  !
+  ! n_points: nb of integrals <= min(LD_D, LD_resv)
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, j, LD_D, LD_resv, n_points
+  double precision, intent(in)  :: D_center(LD_D,3), delta
+  double precision, intent(out) :: resv(LD_resv)
+
+  integer                       :: ipoint
+  integer                       :: power_A(3), power_B(3), l, k
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef, coef1 
+  double precision, allocatable :: analytical_j(:)
+
+  resv(:) = 0.d0
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  allocate(analytical_j(n_points))
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      call overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points, n_points)
+
+      do ipoint = 1, n_points
+        resv(ipoint) = resv(ipoint) + coef * analytical_j(ipoint)
+      enddo
+
+    enddo
+  enddo
+
+  deallocate(analytical_j)
+
+end subroutine overlap_gauss_r12_ao_v
+
+! ---
+
+double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center, delta, i, j)
+
+  BEGIN_DOC
+  !
+  ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2}
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: B_center(3), beta, D_center(3), delta
+
+  integer                      :: power_A1(3), power_A2(3), l, k
+  double precision             :: A1_center(3), A2_center(3), alpha1, alpha2, coef1, coef12, analytical_j
+  double precision             :: G_center(3), gama, fact_g, gama_inv
+
+  double precision, external   :: overlap_gauss_r12, overlap_gauss_r12_ao
+
+  if(beta .lt. 1d-10) then
+    overlap_gauss_r12_ao_with1s = overlap_gauss_r12_ao(D_center, delta, i, j)
+    return
+  endif
+
+  overlap_gauss_r12_ao_with1s = 0.d0
+
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+    return
+  endif
+
+  ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2}
+
+  gama        = beta + delta
+  gama_inv    = 1.d0 / gama
+  G_center(1) = (beta * B_center(1) + delta * D_center(1)) * gama_inv
+  G_center(2) = (beta * B_center(2) + delta * D_center(2)) * gama_inv
+  G_center(3) = (beta * B_center(3) + delta * D_center(3)) * gama_inv
+  fact_g      = beta * delta * gama_inv * ( (B_center(1) - D_center(1)) * (B_center(1) - D_center(1)) &
+                                          + (B_center(2) - D_center(2)) * (B_center(2) - D_center(2)) &
+                                          + (B_center(3) - D_center(3)) * (B_center(3) - D_center(3)) )
+  if(fact_g .gt. 10d0) return
+  fact_g = dexp(-fact_g)
+
+  ! ---
+
+  power_A1(1:3) = ao_power(i,1:3)
+  power_A2(1:3) = ao_power(j,1:3)
+
+  A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha1 = ao_expo_ordered_transp                    (l,i)
+    coef1  = fact_g * ao_coef_normalized_ordered_transp(l,i)
+    if(dabs(coef1) .lt. 1d-12) cycle
+
+    do k = 1, ao_prim_num(j)
+      alpha2 = ao_expo_ordered_transp                   (k,j)
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j)
+      if(dabs(coef12) .lt. 1d-12) cycle
+
+      analytical_j = overlap_gauss_r12(G_center, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2)
+
+      overlap_gauss_r12_ao_with1s += coef12 * analytical_j
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao_with1s
+
+! ---
+
+subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
+
+  BEGIN_DOC
+  !
+  ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2}
+  ! using an array of D_centers.
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, j, n_points, LD_D, LD_resv
+  double precision, intent(in)  :: B_center(3), beta, D_center(LD_D,3), delta
+  double precision, intent(out) :: resv(LD_resv)
+
+  integer                       :: ipoint
+  integer                       :: power_A1(3), power_A2(3), l, k
+  double precision              :: A1_center(3), A2_center(3), alpha1, alpha2, coef1
+  double precision              :: coef12, coef12f
+  double precision              :: gama, gama_inv
+  double precision              :: bg, dg, bdg
+  double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:)
+
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+    return
+  endif
+
+  ASSERT(beta .gt. 0.d0)
+
+  if(beta .lt. 1d-10) then
+    call overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
+    return
+  endif
+
+  resv(:) = 0.d0
+
+  ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2}
+
+  gama     = beta + delta
+  gama_inv = 1.d0 / gama
+
+  power_A1(1:3) = ao_power(i,1:3)
+  power_A2(1:3) = ao_power(j,1:3)
+
+  A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  allocate(fact_g(n_points), G_center(n_points,3), analytical_j(n_points))
+
+  bg  = beta  * gama_inv
+  dg  = delta * gama_inv
+  bdg = bg * delta 
+
+  do ipoint = 1, n_points
+
+    G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1)
+    G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2)
+    G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3)
+    fact_g(ipoint) = bdg * ( (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1)) &
+                           + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2)) &
+                           + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
+
+    if(fact_g(ipoint) < 10d0) then
+      fact_g(ipoint) = dexp(-fact_g(ipoint))
+    else
+      fact_g(ipoint) = 0.d0
+    endif
+
+  enddo
+
+  do l = 1, ao_prim_num(i)
+    alpha1 = ao_expo_ordered_transp           (l,i)
+    coef1  = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      alpha2 = ao_expo_ordered_transp                   (k,j)
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j)
+      if(dabs(coef12) .lt. 1d-12) cycle
+
+      call overlap_gauss_r12_v(G_center, n_points, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points, n_points)
+
+      do ipoint = 1, n_points
+        coef12f = coef12 * fact_g(ipoint)
+        resv(ipoint) += coef12f * analytical_j(ipoint)
+      enddo
+    enddo
+  enddo
+
+  deallocate(fact_g, G_center, analytical_j)
+
+end subroutine overlap_gauss_r12_ao_with1s_v
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/fit_slat_gauss.irp.f b/src/ao_many_one_e_ints/fit_slat_gauss.irp.f
new file mode 100644
index 00000000..052ad072
--- /dev/null
+++ b/src/ao_many_one_e_ints/fit_slat_gauss.irp.f
@@ -0,0 +1,94 @@
+ BEGIN_PROVIDER [integer, n_max_fit_slat]
+ implicit none
+ BEGIN_DOC
+! number of gaussian to fit exp(-x)
+!
+! I took 20 gaussians from the program bassto.f
+ END_DOC
+ n_max_fit_slat = 20
+ END_PROVIDER
+
+ BEGIN_PROVIDER [double precision, coef_fit_slat_gauss, (n_max_fit_slat)]
+&BEGIN_PROVIDER [double precision, expo_fit_slat_gauss, (n_max_fit_slat)]
+ implicit none
+  include 'constants.include.F'
+ BEGIN_DOC
+ ! fit the exp(-x) as 
+ !
+ ! \sum_{i = 1, n_max_fit_slat} coef_fit_slat_gauss(i) * exp(-expo_fit_slat_gauss(i) * x**2)
+ !
+ ! The coefficient are taken from the program bassto.f
+ END_DOC
+
+
+      expo_fit_slat_gauss(01)=30573.77073000000
+      coef_fit_slat_gauss(01)=0.00338925525
+      expo_fit_slat_gauss(02)=5608.45238100000
+      coef_fit_slat_gauss(02)=0.00536433869
+      expo_fit_slat_gauss(03)=1570.95673400000
+      coef_fit_slat_gauss(03)=0.00818702846
+      expo_fit_slat_gauss(04)=541.39785110000
+      coef_fit_slat_gauss(04)=0.01202047655
+      expo_fit_slat_gauss(05)=212.43469630000
+      coef_fit_slat_gauss(05)=0.01711289568
+      expo_fit_slat_gauss(06)=91.31444574000
+      coef_fit_slat_gauss(06)=0.02376001022
+      expo_fit_slat_gauss(07)=42.04087246000
+      coef_fit_slat_gauss(07)=0.03229121736
+      expo_fit_slat_gauss(08)=20.43200443000
+      coef_fit_slat_gauss(08)=0.04303646818
+      expo_fit_slat_gauss(09)=10.37775161000
+      coef_fit_slat_gauss(09)=0.05624657578
+      expo_fit_slat_gauss(10)=5.46880754500
+      coef_fit_slat_gauss(10)=0.07192311571
+      expo_fit_slat_gauss(11)=2.97373529200
+      coef_fit_slat_gauss(11)=0.08949389001
+      expo_fit_slat_gauss(12)=1.66144190200
+      coef_fit_slat_gauss(12)=0.10727599240
+      expo_fit_slat_gauss(13)=0.95052560820
+      coef_fit_slat_gauss(13)=0.12178961750
+      expo_fit_slat_gauss(14)=0.55528683970
+      coef_fit_slat_gauss(14)=0.12740141870
+      expo_fit_slat_gauss(15)=0.33043360020
+      coef_fit_slat_gauss(15)=0.11759168160
+      expo_fit_slat_gauss(16)=0.19982303230
+      coef_fit_slat_gauss(16)=0.08953504394
+      expo_fit_slat_gauss(17)=0.12246840760
+      coef_fit_slat_gauss(17)=0.05066721317
+      expo_fit_slat_gauss(18)=0.07575825322
+      coef_fit_slat_gauss(18)=0.01806363869
+      expo_fit_slat_gauss(19)=0.04690146243
+      coef_fit_slat_gauss(19)=0.00305632563
+      expo_fit_slat_gauss(20)=0.02834749861
+      coef_fit_slat_gauss(20)=0.00013317513
+
+
+
+END_PROVIDER 
+
+double precision function slater_fit_gam(x,gam)
+ implicit none
+ double precision, intent(in) :: x,gam
+ BEGIN_DOC
+! fit of the function exp(-gam * x) with gaussian functions 
+ END_DOC
+ integer :: i
+ slater_fit_gam = 0.d0
+ do i = 1, n_max_fit_slat
+  slater_fit_gam += coef_fit_slat_gauss(i) * dexp(-expo_fit_slat_gauss(i) * gam * gam * x * x)
+ enddo
+end
+
+subroutine expo_fit_slater_gam(gam,expos)
+ implicit none
+ BEGIN_DOC
+! returns the array of the exponents of the gaussians to fit exp(-gam*x)
+ END_DOC
+ double precision, intent(in)  :: gam
+ double precision, intent(out) :: expos(n_max_fit_slat)
+ integer :: i
+ do i = 1, n_max_fit_slat
+  expos(i) = expo_fit_slat_gauss(i) * gam * gam
+ enddo
+end
+
diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
new file mode 100644
index 00000000..f01ed5ba
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -0,0 +1,517 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+  double precision              :: int_gauss, dsqpi_3_2, int_j1b
+  double precision              :: factor_ij_1s, beta_ij, center_ij_1s(3), sq_pi_3_2 
+  double precision, allocatable :: int_fit_v(:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2_test ...'
+
+  sq_pi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
+  call wall_time(wall0)
+
+  int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                              &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,                                     &
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s) &
+     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_thr_b3_size,                   &
+     !$OMP          final_grid_points_transp, ng_fit_jast,                                                  &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,                                             &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,                                           &
+     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,              &
+     !$OMP          ao_overlap_abs,sq_pi_3_2)
+ !$OMP DO SCHEDULE(dynamic)
+ do ipoint = 1, n_points_final_grid
+   r(1) = final_grid_points(1,ipoint)
+   r(2) = final_grid_points(2,ipoint)
+   r(3) = final_grid_points(3,ipoint)
+   do i = 1, ao_num
+     do j = i, ao_num
+       if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+         cycle
+       endif
+  
+       do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+  
+         do i_fit = 1, ng_fit_jast
+  
+           expo_fit = expo_gauss_1_erf_x_2(i_fit)
+           !DIR$ FORCEINLINE
+           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+           coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+!           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+           if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle
+  
+!           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+!                 expo_fit, i, j, int_fit_v, n_points_final_grid)
+           int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+  
+           int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss 
+  
+         enddo
+        enddo
+       enddo
+     enddo
+   enddo
+
+   !$OMP END DO
+   !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+!  !
+!  END_DOC
+!
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+
+  double precision, allocatable :: int_fit_v(:),big_array(:,:,:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2_test_v ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+  call wall_time(wall0)
+
+ double precision :: int_j1b
+ big_array(:,:,:) = 0.d0
+ allocate(big_array(n_points_final_grid,ao_num, ao_num))
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
+     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size,&
+     !$OMP          final_grid_points_transp, ng_fit_jast,               &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
+     !$OMP          List_comb_thr_b3_cent, big_array,&
+     !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
+!
+ allocate(int_fit_v(n_points_final_grid))
+ !$OMP DO SCHEDULE(dynamic)
+ do i = 1, ao_num
+   do j = i, ao_num
+
+     if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+       cycle
+     endif
+
+      do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+!         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+       do i_fit = 1, ng_fit_jast
+
+         expo_fit = expo_gauss_1_erf_x_2(i_fit)
+         coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+
+         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, size(final_grid_points_transp,1),&
+               expo_fit, i, j, int_fit_v, size(int_fit_v,1),n_points_final_grid)
+
+         do ipoint = 1, n_points_final_grid
+           big_array(ipoint,j,i) += coef_fit * int_fit_v(ipoint)
+         enddo
+
+       enddo
+
+     enddo
+   enddo
+ enddo
+ !$OMP END DO
+ deallocate(int_fit_v)
+ !$OMP END PARALLEL
+ do i = 1, ao_num
+   do j = i, ao_num
+    do ipoint = 1, n_points_final_grid
+     int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,j,i)
+    enddo
+   enddo
+  enddo
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,i,j)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_v', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3), tmp
+  double precision              :: wall0, wall1,int_j1b
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2
+
+  print*, ' providing int2_u2_j1b2_test ...'
+
+  sq_pi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u2_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b,factor_ij_1s,beta_ij,center_ij_1s)          & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+          
+            expo_fit = expo_gauss_j_mu_x_2(i_fit)
+            coef_fit = coef_gauss_j_mu_x_2(i_fit)
+            !DIR$ FORCEINLINE
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+!            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle
+          
+            ! ---
+          
+              int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+          
+              tmp += coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u2_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u2_j1b2_test(j,i,ipoint) = int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s, i_fit
+  double precision :: r(3), int_fit(3), expo_fit, coef_fit
+  double precision :: coef, beta, B_center(3), dist
+  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
+  double precision :: tmp_x, tmp_y, tmp_z, int_j1b
+  double precision :: wall0, wall1, sq_pi_3_2,sq_alpha
+
+  print*, ' providing int2_u_grad1u_x_j1b2_test ...'
+
+  sq_pi_3_2 = dacos(-1.D0)**(1.d0)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u_grad1u_x_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
+ !$OMP          tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha)                        & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2)
+ !$OMP DO
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          do i_fit = 1, ng_fit_jast
+    
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+    
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv)
+!            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version
+            if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle
+            
+            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+
+            tmp_x += coef_tmp * int_fit(1)
+            tmp_y += coef_tmp * int_fit(2)
+            tmp_z += coef_tmp * int_fit(3)
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = tmp_x
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = tmp_y
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,1)
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,2)
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_x_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+  double precision :: j12_mu_r12,int_j1b
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+  double precision :: beta_ij,center_ij_1s(3),factor_ij_1s
+
+  print*, ' providing int2_u_grad1u_j1b2_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent
+  call wall_time(wall0)
+
+
+  int2_u_grad1u_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
+ !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, &
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+
+          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.1.d-15)cycle
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 20.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-08) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+        enddo
+
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
new file mode 100644
index 00000000..8196614f
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -0,0 +1,420 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2 ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  int2_grad1u2_grad2u2_j1b2 = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,         &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_1_erf_x_2(i_fit)
+          coef_fit = coef_gauss_1_erf_x_2(i_fit)
+
+          ! ---
+
+          int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
+          tmp += -0.25d0 * coef_fit * int_fit
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b3_size
+
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+
+            int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += -0.25d0 * coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2 =', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3), tmp
+  double precision              :: wall0, wall1
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_u2_j1b2 ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  int2_u2_j1b2 = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_j_mu_x_2(i_fit)
+          coef_fit = coef_gauss_j_mu_x_2(i_fit)
+
+          ! ---
+
+          int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
+          tmp += coef_fit * int_fit
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b3_size
+         
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+
+            int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u2_j1b2(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u2_j1b2(j,i,ipoint) = int2_u2_j1b2(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u2_j1b2', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s, i_fit
+  double precision :: r(3), int_fit(3), expo_fit, coef_fit
+  double precision :: coef, beta, B_center(3), dist
+  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
+
+  print*, ' providing int2_u_grad1u_x_j1b2 ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  int2_u_grad1u_x_j1b2 = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
+ !$OMP          tmp_x, tmp_y, tmp_z)                                & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
+ !$OMP DO
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+          coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+          ! ---
+
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r, int_fit)
+          tmp_x += coef_fit * int_fit(1)
+          tmp_y += coef_fit * int_fit(2)
+          tmp_z += coef_fit * int_fit(3)
+!          if( dabs(coef_fit)*(dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b3_size
+          
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+!            if(dabs(coef_tmp) .lt. 1d-12) cycle
+            
+            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+
+            tmp_x += coef_tmp * int_fit(1)
+            tmp_y += coef_tmp * int_fit(2)
+            tmp_z += coef_tmp * int_fit(3)
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = tmp_x
+        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = tmp_y
+        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_x_j1b2 = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+
+  print*, ' providing int2_u_grad1u_j1b2 ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  int2_u_grad1u_j1b2 = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_u_grad1u_j1b2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+          coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+          ! ---
+
+          int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r)
+!          if(dabs(coef_fit)*dabs(int_fit) .lt. 1d-12) cycle
+
+          tmp += coef_fit * int_fit
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b3_size
+
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3))
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 80.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-12) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_j1b2(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2(j,i,ipoint) = int2_u_grad1u_j1b2(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
new file mode 100644
index 00000000..21927371
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
@@ -0,0 +1,453 @@
+!
+!! ---
+!
+!BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! -\frac{1}{4} int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: i, j, ipoint, i_1s, i_fit
+!  integer                       :: i_mask_grid
+!  double precision              :: r(3), expo_fit, coef_fit
+!  double precision              :: coef, beta, B_center(3)
+!  double precision              :: wall0, wall1
+!
+!  integer,          allocatable :: n_mask_grid(:)
+!  double precision, allocatable :: r_mask_grid(:,:)
+!  double precision, allocatable :: int_fit_v(:)
+!
+!  print*, ' providing int2_grad1u2_grad2u2_j1b2'
+!
+!  provide mu_erf final_grid_points_transp j1b_pen
+!  call wall_time(wall0)
+!
+!  int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0
+!
+! !$OMP PARALLEL DEFAULT (NONE)                                     &
+! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+! !$OMP          coef_fit, expo_fit, int_fit_v, n_mask_grid,        &
+! !$OMP          i_mask_grid, r_mask_grid)                          &
+! !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
+! !$OMP          final_grid_points_transp, n_max_fit_slat,          &
+! !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,        &
+! !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,      &
+! !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,  &
+! !$OMP          ao_overlap_abs)
+!
+!  allocate(int_fit_v(n_points_final_grid))
+!  allocate(n_mask_grid(n_points_final_grid))
+!  allocate(r_mask_grid(n_points_final_grid,3))
+!
+! !$OMP DO SCHEDULE(dynamic)
+!  do i = 1, ao_num
+!    do j = i, ao_num
+!
+!      if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+!        cycle
+!      endif
+!
+!      do i_fit = 1, n_max_fit_slat
+!
+!        expo_fit = expo_gauss_1_erf_x_2(i_fit)
+!        coef_fit = coef_gauss_1_erf_x_2(i_fit) * (-0.25d0)
+!
+!        ! ---
+!
+!        call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+!
+!        i_mask_grid = 0    ! dim
+!        n_mask_grid = 0    ! ind
+!        r_mask_grid = 0.d0 ! val
+!        do ipoint = 1, n_points_final_grid
+!
+!          int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+!
+!          if(dabs(int_fit_v(ipoint)) .gt. 1d-10) then
+!            i_mask_grid += 1
+!            n_mask_grid(i_mask_grid  ) = ipoint
+!            r_mask_grid(i_mask_grid,1) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid,2) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid,3) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!        enddo
+!
+!        if(i_mask_grid .eq. 0) cycle
+!
+!        ! ---
+!
+!        do i_1s = 2, List_all_comb_b3_size
+!
+!          coef        = List_all_comb_b3_coef  (i_1s) * coef_fit
+!          beta        = List_all_comb_b3_expo  (i_1s)
+!          B_center(1) = List_all_comb_b3_cent(1,i_1s)
+!          B_center(2) = List_all_comb_b3_cent(2,i_1s)
+!          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+!
+!          call overlap_gauss_r12_ao_with1s_v(B_center, beta, r_mask_grid, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, i_mask_grid)
+!
+!          do ipoint = 1, i_mask_grid
+!            int2_grad1u2_grad2u2_j1b2(j,i,n_mask_grid(ipoint)) += coef * int_fit_v(ipoint)
+!          enddo
+!
+!        enddo
+!
+!        ! ---
+!
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END DO
+!
+!  deallocate(n_mask_grid)
+!  deallocate(r_mask_grid)
+!  deallocate(int_fit_v)
+!
+! !$OMP END PARALLEL
+!
+!  do ipoint = 1, n_points_final_grid
+!    do i = 2, ao_num
+!      do j = 1, i-1
+!        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
+!      enddo
+!    enddo
+!  enddo
+!
+!  call wall_time(wall1)
+!  print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0
+!
+!END_PROVIDER
+!
+!! ---
+!
+!BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: i, j, ipoint, i_1s, i_fit
+!  integer                       :: i_mask_grid
+!  double precision              :: r(3), expo_fit, coef_fit
+!  double precision              :: coef, beta, B_center(3), tmp
+!  double precision              :: wall0, wall1
+!
+!  integer,          allocatable :: n_mask_grid(:)
+!  double precision, allocatable :: r_mask_grid(:,:)
+!  double precision, allocatable :: int_fit_v(:)
+!
+!  print*, ' providing int2_u2_j1b2'
+!
+!  provide mu_erf final_grid_points_transp j1b_pen
+!  call wall_time(wall0)
+!
+!  int2_u2_j1b2(:,:,:) = 0.d0
+!
+! !$OMP PARALLEL DEFAULT (NONE)                                      &
+! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+! !$OMP          coef_fit, expo_fit, int_fit_v,                      &
+! !$OMP          i_mask_grid, n_mask_grid, r_mask_grid )             &
+! !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, &
+! !$OMP          final_grid_points_transp, n_max_fit_slat,           &
+! !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+! !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       &
+! !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
+!
+!  allocate(n_mask_grid(n_points_final_grid))
+!  allocate(r_mask_grid(n_points_final_grid,3))
+!  allocate(int_fit_v(n_points_final_grid))
+!
+! !$OMP DO SCHEDULE(dynamic)
+!  do i = 1, ao_num
+!    do j = i, ao_num
+!
+!      do i_fit = 1, n_max_fit_slat
+!
+!        expo_fit = expo_gauss_j_mu_x_2(i_fit)
+!        coef_fit = coef_gauss_j_mu_x_2(i_fit)
+!
+!        ! ---
+!
+!        call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+!
+!        i_mask_grid = 0    ! dim
+!        n_mask_grid = 0    ! ind
+!        r_mask_grid = 0.d0 ! val
+!
+!        do ipoint = 1, n_points_final_grid
+!          int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+!
+!          if(dabs(int_fit_v(ipoint)) .gt. 1d-10) then
+!            i_mask_grid += 1
+!            n_mask_grid(i_mask_grid  ) = ipoint
+!            r_mask_grid(i_mask_grid,1) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid,2) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid,3) = final_grid_points_transp(ipoint,3)
+!          endif
+!        enddo
+!
+!        if(i_mask_grid .eq. 0) cycle
+!
+!        ! ---
+!
+!        do i_1s = 2, List_all_comb_b3_size
+!
+!          coef        = List_all_comb_b3_coef  (i_1s) * coef_fit
+!          beta        = List_all_comb_b3_expo  (i_1s)
+!          B_center(1) = List_all_comb_b3_cent(1,i_1s)
+!          B_center(2) = List_all_comb_b3_cent(2,i_1s)
+!          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+!
+!          call overlap_gauss_r12_ao_with1s_v(B_center, beta, r_mask_grid, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, i_mask_grid)
+!
+!          do ipoint = 1, i_mask_grid
+!            int2_u2_j1b2(j,i,n_mask_grid(ipoint)) += coef * int_fit_v(ipoint)
+!          enddo
+!
+!        enddo
+!
+!        ! ---
+!
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END DO
+!
+!  deallocate(n_mask_grid)
+!  deallocate(r_mask_grid)
+!  deallocate(int_fit_v)
+!
+! !$OMP END PARALLEL
+!
+!  do ipoint = 1, n_points_final_grid
+!    do i = 2, ao_num
+!      do j = 1, i-1
+!        int2_u2_j1b2(j,i,ipoint) = int2_u2_j1b2(i,j,ipoint)
+!      enddo
+!    enddo
+!  enddo
+!
+!  call wall_time(wall1)
+!  print*, ' wall time for int2_u2_j1b2', wall1 - wall0
+!
+!END_PROVIDER
+!
+!! ---
+!
+!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)]
+!
+!  BEGIN_DOC
+!  !
+!  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+!  !
+!  END_DOC
+!
+!  implicit none
+!
+!  integer                       :: i, j, ipoint, i_1s, i_fit
+!  integer                       :: i_mask_grid1, i_mask_grid2, i_mask_grid3, i_mask_grid(3)
+!  double precision              :: x, y, z, expo_fit, coef_fit
+!  double precision              :: coef, beta, B_center(3)
+!  double precision              :: alpha_1s, alpha_1s_inv, expo_coef_1s
+!  double precision              :: wall0, wall1
+!
+!  integer,          allocatable :: n_mask_grid(:,:)
+!  double precision, allocatable :: r_mask_grid(:,:,:)
+!  double precision, allocatable :: int_fit_v(:,:), dist(:,:), centr_1s(:,:,:)
+!
+!  print*, ' providing int2_u_grad1u_x_j1b2'
+!
+!  provide mu_erf final_grid_points_transp j1b_pen
+!  call wall_time(wall0)
+!
+!  int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0
+!
+! !$OMP PARALLEL DEFAULT (NONE)                                          &
+! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, x, y, z, coef, beta,         &
+! !$OMP          coef_fit, expo_fit, int_fit_v, alpha_1s, dist, B_center,&
+! !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s,                   &
+! !$OMP          i_mask_grid1, i_mask_grid2, i_mask_grid3, i_mask_grid,  &
+! !$OMP          n_mask_grid, r_mask_grid)                               &
+! !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,     &
+! !$OMP          final_grid_points_transp, n_max_fit_slat,               &
+! !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,           &
+! !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,           &
+! !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
+!
+!  allocate(dist(n_points_final_grid,3))
+!  allocate(centr_1s(n_points_final_grid,3,3))
+!  allocate(n_mask_grid(n_points_final_grid,3))
+!  allocate(r_mask_grid(n_points_final_grid,3,3))
+!  allocate(int_fit_v(n_points_final_grid,3))
+!
+! !$OMP DO SCHEDULE(dynamic)
+!  do i = 1, ao_num
+!    do j = i, ao_num
+!      do i_fit = 1, n_max_fit_slat
+!
+!        expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+!        coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+!
+!        ! ---
+!
+!        call NAI_pol_x_mult_erf_ao_with1s_v0(i, j, expo_fit, final_grid_points_transp, n_points_final_grid, 1.d+9, final_grid_points_transp, n_points_final_grid, int_fit_v, n_points_final_grid, n_points_final_grid)
+!
+!        i_mask_grid1 = 0    ! dim
+!        i_mask_grid2 = 0    ! dim
+!        i_mask_grid3 = 0    ! dim
+!        n_mask_grid  = 0    ! ind
+!        r_mask_grid  = 0.d0 ! val
+!        do ipoint = 1, n_points_final_grid
+!
+!          ! ---
+!
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,1) += coef_fit * int_fit_v(ipoint,1)
+!
+!          if(dabs(int_fit_v(ipoint,1)) .gt. 1d-10) then
+!            i_mask_grid1 += 1
+!            n_mask_grid(i_mask_grid1,  1) = ipoint
+!            r_mask_grid(i_mask_grid1,1,1) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid1,2,1) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid1,3,1) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!          ! ---
+!
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,2) += coef_fit * int_fit_v(ipoint,2)
+!
+!          if(dabs(int_fit_v(ipoint,2)) .gt. 1d-10) then
+!            i_mask_grid2 += 1
+!            n_mask_grid(i_mask_grid2,  2) = ipoint
+!            r_mask_grid(i_mask_grid2,1,2) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid2,2,2) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid2,3,2) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!          ! ---
+!
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,3) += coef_fit * int_fit_v(ipoint,3)
+!
+!          if(dabs(int_fit_v(ipoint,3)) .gt. 1d-10) then
+!            i_mask_grid3 += 1
+!            n_mask_grid(i_mask_grid3,  3) = ipoint
+!            r_mask_grid(i_mask_grid3,1,3) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid3,2,3) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid3,3,3) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!          ! ---
+!
+!        enddo
+!
+!        if((i_mask_grid1+i_mask_grid2+i_mask_grid3) .eq. 0) cycle
+!
+!        i_mask_grid(1) = i_mask_grid1
+!        i_mask_grid(2) = i_mask_grid2
+!        i_mask_grid(3) = i_mask_grid3
+!
+!        ! ---
+!
+!        do i_1s = 2, List_all_comb_b3_size
+!
+!          coef        = List_all_comb_b3_coef  (i_1s) * coef_fit
+!          beta        = List_all_comb_b3_expo  (i_1s)
+!          B_center(1) = List_all_comb_b3_cent(1,i_1s)
+!          B_center(2) = List_all_comb_b3_cent(2,i_1s)
+!          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+!
+!          alpha_1s     = beta + expo_fit
+!          alpha_1s_inv = 1.d0 / alpha_1s
+!          expo_coef_1s = beta * expo_fit * alpha_1s_inv 
+!
+!          do ipoint = 1, i_mask_grid1
+!
+!            x = r_mask_grid(ipoint,1,1)
+!            y = r_mask_grid(ipoint,2,1)
+!            z = r_mask_grid(ipoint,3,1)
+!
+!            centr_1s(ipoint,1,1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x)
+!            centr_1s(ipoint,2,1) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y)
+!            centr_1s(ipoint,3,1) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z)
+!
+!            dist(ipoint,1) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid2
+!
+!            x = r_mask_grid(ipoint,1,2)
+!            y = r_mask_grid(ipoint,2,2)
+!            z = r_mask_grid(ipoint,3,2)
+!
+!            centr_1s(ipoint,1,2) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x)
+!            centr_1s(ipoint,2,2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y)
+!            centr_1s(ipoint,3,2) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z)
+!
+!            dist(ipoint,2) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid3
+!
+!            x = r_mask_grid(ipoint,1,3)
+!            y = r_mask_grid(ipoint,2,3)
+!            z = r_mask_grid(ipoint,3,3)
+!
+!            centr_1s(ipoint,1,3) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x)
+!            centr_1s(ipoint,2,3) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y)
+!            centr_1s(ipoint,3,3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z)
+!
+!            dist(ipoint,3) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z)
+!          enddo
+!
+!          call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s, n_points_final_grid, 1.d+9, r_mask_grid, n_points_final_grid, int_fit_v, n_points_final_grid, i_mask_grid)
+!
+!          do ipoint = 1, i_mask_grid1
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,1),1) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid2
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,2),2) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid3
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,3),3) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3)
+!          enddo
+!
+!        enddo
+!
+!        ! ---
+!
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END DO
+!
+!  deallocate(dist)
+!  deallocate(centr_1s)
+!  deallocate(n_mask_grid)
+!  deallocate(r_mask_grid)
+!  deallocate(int_fit_v)
+!
+! !$OMP END PARALLEL
+!
+!  do ipoint = 1, n_points_final_grid
+!    do i = 2, ao_num
+!      do j = 1, i-1
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3)
+!      enddo
+!    enddo
+!  enddo
+!
+!  call wall_time(wall1)
+!  print*, ' wall time for int2_u_grad1u_x_j1b2 =', wall1 - wall0
+!
+!END_PROVIDER
+!
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
new file mode 100644
index 00000000..a6a55810
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -0,0 +1,369 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R| - 1) / |r - R|
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_mu, int_coulomb
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp,int_j1b
+  double precision           :: wall0, wall1
+  double precision, external :: NAI_pol_mult_erf_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+
+  print*, ' providing v_ij_erf_rk_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                         &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp, int_j1b)& 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b,  &
+ !$OMP          v_ij_erf_rk_cst_mu_j1b_test, mu_erf,                                   &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+          ! TODO :: cycle on the 1 - erf(mur12)
+          int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+          int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+
+          tmp += coef * (int_mu - int_coulomb)
+        enddo
+
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s
+  double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s
+
+  print*, ' providing x_v_ij_erf_rk_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide expo_erfc_mu_gauss ao_prod_sigma ao_prod_center
+  call wall_time(wall0)
+
+  x_v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
+ !$OMP          int_j1b, tmp_x, tmp_y, tmp_z,factor_ij_1s,beta_ij,center_ij_1s)       & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
+ !$OMP          x_v_ij_erf_rk_cst_mu_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma)
+! !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          if(ao_prod_center(1,j,i).ne.10000.d0)then
+!           ! approximate 1 - erf(mu r12) by a gaussian * 10
+!           !DIR$ FORCEINLINE
+!           call gaussian_product(expo_erfc_mu_gauss,r,     &
+!                ao_prod_sigma(j,i),ao_prod_center(1,j,i),  & 
+!                factor_ij_1s,beta_ij,center_ij_1s)
+!           if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-1.5d0)).lt.1.d-10)cycle 
+!          endif
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+          tmp_x += coef * (ints(1) - ints_coulomb(1))
+          tmp_y += coef * (ints(2) - ints_coulomb(2))
+          tmp_z += coef * (ints(3) - ints_coulomb(3))
+        enddo
+
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = tmp_x
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = tmp_y
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+! TODO analytically
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12)
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s, i_fit
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+
+  print*, ' providing v_ij_u_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_gauss_j_mu_x(i_fit)
+            coef_fit = coef_gauss_j_mu_x(i_fit)
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) with u(mu,r12) \approx 1/2 mu e^{-2.5 * mu (r12)^2}
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_ng_1_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s,  r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, expo_good_j_mu_1gauss,coef_good_j_mu_1gauss,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_good_j_mu_1gauss
+            coef_fit = 1.d0
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+!          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_ng_1_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_ng_1_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
new file mode 100644
index 00000000..fc30cd83
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
@@ -0,0 +1,300 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R| - 1) / |r - R|
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_mu, int_coulomb
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+  double precision, external :: NAI_pol_mult_erf_ao_with1s
+
+  print *, ' providing v_ij_erf_rk_cst_mu_j1b ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  v_ij_erf_rk_cst_mu_j1b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                         &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp) & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points, &
+ !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,   &
+ !$OMP          v_ij_erf_rk_cst_mu_j1b, mu_erf)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp = 0.d0
+
+        ! ---
+
+        coef        = List_all_comb_b2_coef  (1)
+        beta        = List_all_comb_b2_expo  (1)
+        B_center(1) = List_all_comb_b2_cent(1,1)
+        B_center(2) = List_all_comb_b2_cent(2,1)
+        B_center(3) = List_all_comb_b2_cent(3,1)
+
+        int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+        int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+!        if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle
+
+        tmp += coef * (int_mu - int_coulomb)
+
+        ! ---
+
+        do i_1s = 2, List_all_comb_b2_size
+
+          coef        = List_all_comb_b2_coef  (i_1s)
+          beta        = List_all_comb_b2_expo  (i_1s)
+          B_center(1) = List_all_comb_b2_cent(1,i_1s)
+          B_center(2) = List_all_comb_b2_cent(2,i_1s)
+          B_center(3) = List_all_comb_b2_cent(3,i_1s)
+
+          int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+          int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+
+          tmp += coef * (int_mu - int_coulomb)
+        enddo
+
+        ! ---
+
+        v_ij_erf_rk_cst_mu_j1b(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_erf_rk_cst_mu_j1b(j,i,ipoint) = v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_erf_rk_cst_mu_j1b', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s
+  double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
+
+  print*, ' providing x_v_ij_erf_rk_cst_mu_j1b ...'
+  call wall_time(wall0)
+
+  x_v_ij_erf_rk_cst_mu_j1b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
+ !$OMP          tmp_x, tmp_y, tmp_z)                                                  & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,&
+ !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,  &
+ !$OMP          x_v_ij_erf_rk_cst_mu_j1b, mu_erf)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+
+        ! ---
+
+        coef        = List_all_comb_b2_coef  (1)
+        beta        = List_all_comb_b2_expo  (1)
+        B_center(1) = List_all_comb_b2_cent(1,1)
+        B_center(2) = List_all_comb_b2_cent(2,1)
+        B_center(3) = List_all_comb_b2_cent(3,1)
+
+        call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+        call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+!        if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
+
+        tmp_x += coef * (ints(1) - ints_coulomb(1))
+        tmp_y += coef * (ints(2) - ints_coulomb(2))
+        tmp_z += coef * (ints(3) - ints_coulomb(3))
+
+        ! ---
+
+        do i_1s = 2, List_all_comb_b2_size
+
+          coef        = List_all_comb_b2_coef  (i_1s)
+          beta        = List_all_comb_b2_expo  (i_1s)
+          B_center(1) = List_all_comb_b2_cent(1,i_1s)
+          B_center(2) = List_all_comb_b2_cent(2,i_1s)
+          B_center(3) = List_all_comb_b2_cent(3,i_1s)
+
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+          tmp_x += coef * (ints(1) - ints_coulomb(1))
+          tmp_y += coef * (ints(2) - ints_coulomb(2))
+          tmp_z += coef * (ints(3) - ints_coulomb(3))
+        enddo
+
+        ! ---
+
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = tmp_x
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = tmp_y
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b =', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+! TODO analytically
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12)
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s, i_fit
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing v_ij_u_cst_mu_j1b ...'
+  call wall_time(wall0)
+
+  provide mu_erf final_grid_points j1b_pen
+
+  v_ij_u_cst_mu_j1b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo,       & 
+ !$OMP          List_all_comb_b2_cent, v_ij_u_cst_mu_j1b)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
+
+          expo_fit = expo_gauss_j_mu_x(i_fit)
+          coef_fit = coef_gauss_j_mu_x(i_fit)
+
+          ! ---
+
+          coef        = List_all_comb_b2_coef  (1)
+          beta        = List_all_comb_b2_expo  (1)
+          B_center(1) = List_all_comb_b2_cent(1,1)
+          B_center(2) = List_all_comb_b2_cent(2,1)
+          B_center(3) = List_all_comb_b2_cent(3,1)
+
+          int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+!          if(dabs(int_fit*coef) .lt. 1d-12) cycle
+
+          tmp += coef * coef_fit * int_fit
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b2_size
+
+            coef        = List_all_comb_b2_coef  (i_1s)
+            beta        = List_all_comb_b2_expo  (i_1s)
+            B_center(1) = List_all_comb_b2_cent(1,i_1s)
+            B_center(2) = List_all_comb_b2_cent(2,i_1s)
+            B_center(3) = List_all_comb_b2_cent(3,i_1s)
+
+            int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        v_ij_u_cst_mu_j1b(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b(j,i,ipoint) = v_ij_u_cst_mu_j1b(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad_related_ints.irp.f b/src/ao_many_one_e_ints/grad_related_ints.irp.f
new file mode 100644
index 00000000..8624e7b8
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad_related_ints.irp.f
@@ -0,0 +1,437 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R| - 1) / |r - R|
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: r(3)
+  double precision :: int_mu, int_coulomb
+  double precision :: wall0, wall1
+
+  double precision :: NAI_pol_mult_erf_ao
+
+  print*, ' providing v_ij_erf_rk_cst_mu ...'
+
+  provide mu_erf final_grid_points 
+  call wall_time(wall0)
+
+  v_ij_erf_rk_cst_mu = 0.d0
+
+ !$OMP PARALLEL                                       &
+ !$OMP DEFAULT (NONE)                                 &
+ !$OMP PRIVATE (i, j, ipoint, r, int_mu, int_coulomb) & 
+ !$OMP SHARED  (ao_num, n_points_final_grid, v_ij_erf_rk_cst_mu, final_grid_points, mu_erf)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        int_mu      = NAI_pol_mult_erf_ao(i, j, mu_erf, r)
+        int_coulomb = NAI_pol_mult_erf_ao(i, j,  1.d+9, r)
+
+        v_ij_erf_rk_cst_mu(j,i,ipoint) = int_mu - int_coulomb
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+ 
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_erf_rk_cst_mu(j,i,ipoint) = v_ij_erf_rk_cst_mu(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_erf_rk_cst_mu = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_transp, (n_points_final_grid, ao_num, ao_num)]
+
+  BEGIN_DOC
+  ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R| - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: r(3)
+  double precision :: int_mu, int_coulomb
+  double precision :: wall0, wall1
+  double precision :: NAI_pol_mult_erf_ao
+
+  print *, ' providing v_ij_erf_rk_cst_mu_transp ...'
+
+  provide mu_erf final_grid_points 
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                                  &
+ !$OMP DEFAULT (NONE)                            &
+ !$OMP PRIVATE (i,j,ipoint,r,int_mu,int_coulomb) & 
+ !$OMP SHARED (ao_num,n_points_final_grid,v_ij_erf_rk_cst_mu_transp,final_grid_points,mu_erf)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        int_mu      = NAI_pol_mult_erf_ao(i, j, mu_erf, r)
+        int_coulomb = NAI_pol_mult_erf_ao(i, j,  1.d+9, r)
+
+        v_ij_erf_rk_cst_mu_transp(ipoint,j,i) = int_mu - int_coulomb
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 2, ao_num
+    do j = 1, i-1
+      do ipoint = 1, n_points_final_grid
+        v_ij_erf_rk_cst_mu_transp(ipoint,j,i) = v_ij_erf_rk_cst_mu_transp(ipoint,i,j)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: r(3), ints(3), ints_coulomb(3)
+  double precision :: wall0, wall1
+
+  print*, ' providing x_v_ij_erf_rk_cst_mu_tmp ...'
+
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                                 &
+ !$OMP DEFAULT (NONE)                           &
+ !$OMP PRIVATE (i,j,ipoint,r,ints,ints_coulomb) & 
+ !$OMP SHARED (ao_num,n_points_final_grid,x_v_ij_erf_rk_cst_mu_tmp,final_grid_points,mu_erf)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        call NAI_pol_x_mult_erf_ao(i, j, mu_erf, r, ints        )
+        call NAI_pol_x_mult_erf_ao(i, j, 1.d+9 , r, ints_coulomb)
+
+        x_v_ij_erf_rk_cst_mu_tmp(1,j,i,ipoint) = ints(1) - ints_coulomb(1)
+        x_v_ij_erf_rk_cst_mu_tmp(2,j,i,ipoint) = ints(2) - ints_coulomb(2)
+        x_v_ij_erf_rk_cst_mu_tmp(3,j,i,ipoint) = ints(3) - ints_coulomb(3)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+ 
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        x_v_ij_erf_rk_cst_mu_tmp(1,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(1,i,j,ipoint)
+        x_v_ij_erf_rk_cst_mu_tmp(2,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(2,i,j,ipoint)
+        x_v_ij_erf_rk_cst_mu_tmp(3,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(3,i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing x_v_ij_erf_rk_cst_mu ...'
+
+  call wall_time(wall0)
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, ao_num
+        x_v_ij_erf_rk_cst_mu(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp(1,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp(2,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp(3,j,i,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp, (ao_num, ao_num,3,n_points_final_grid)]
+
+  BEGIN_DOC
+  ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing x_v_ij_erf_rk_cst_mu_transp ...'
+
+  call wall_time(wall0)
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, ao_num
+        x_v_ij_erf_rk_cst_mu_transp(j,i,1,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(1,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_transp(j,i,2,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(2,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_transp(j,i,3,ipoint) = x_v_ij_erf_rk_cst_mu_tmp(3,j,i,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_final_grid, ao_num, ao_num, 3)]
+
+  BEGIN_DOC
+  ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing x_v_ij_erf_rk_cst_mu_transp_bis ...'
+
+  call wall_time(wall0)
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,j,i,1) = x_v_ij_erf_rk_cst_mu_tmp(1,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,j,i,2) = x_v_ij_erf_rk_cst_mu_tmp(2,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,j,i,3) = x_v_ij_erf_rk_cst_mu_tmp(3,j,i,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp_bis = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_final_grid, ao_num, ao_num)]
+
+  BEGIN_DOC
+  ! d_dx_v_ij_erf_rk_cst_mu_tmp(m,R,j,i) = int dr phi_j(r)) (erf(mu(R) |r - R|) - 1)/|r - R| d/dx (phi_i(r) 
+  !
+  ! with m == 1 -> d/dx , m == 2 -> d/dy , m == 3 -> d/dz
+  END_DOC
+
+ implicit none
+ integer          :: i, j, ipoint
+ double precision :: r(3), ints(3), ints_coulomb(3)
+ double precision :: wall0, wall1
+
+  print *, ' providing d_dx_v_ij_erf_rk_cst_mu_tmp ...'
+
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                                 &
+ !$OMP DEFAULT (NONE)                           &
+ !$OMP PRIVATE (i,j,ipoint,r,ints,ints_coulomb) & 
+ !$OMP SHARED (ao_num,n_points_final_grid,d_dx_v_ij_erf_rk_cst_mu_tmp,final_grid_points,mu_erf)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+        call phi_j_erf_mu_r_dxyz_phi(j, i, mu_erf, r, ints)
+        call phi_j_erf_mu_r_dxyz_phi(j, i,  1.d+9, r, ints_coulomb)
+
+        d_dx_v_ij_erf_rk_cst_mu_tmp(1,ipoint,j,i) = ints(1) - ints_coulomb(1)
+        d_dx_v_ij_erf_rk_cst_mu_tmp(2,ipoint,j,i) = ints(2) - ints_coulomb(2)
+        d_dx_v_ij_erf_rk_cst_mu_tmp(3,ipoint,j,i) = ints(3) - ints_coulomb(3)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid, ao_num, ao_num, 3)]
+
+  BEGIN_DOC
+  !
+  ! d_dx_v_ij_erf_rk_cst_mu_tmp(j,i,R,m) = int dr phi_j(r)) (erf(mu(R) |r - R|) - 1)/|r - R| d/dx (phi_i(r) 
+  !
+  ! with m == 1 -> d/dx , m == 2 -> d/dy , m == 3 -> d/dz
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing d_dx_v_ij_erf_rk_cst_mu ...'
+
+  call wall_time(wall0)
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,1) = d_dx_v_ij_erf_rk_cst_mu_tmp(1,ipoint,j,i)
+        d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,2) = d_dx_v_ij_erf_rk_cst_mu_tmp(2,ipoint,j,i)
+        d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,3) = d_dx_v_ij_erf_rk_cst_mu_tmp(3,ipoint,j,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_final_grid, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! x_d_dx_v_ij_erf_rk_cst_mu_tmp(m,j,i,R) = int dr x phi_j(r)) (erf(mu(R) |r - R|) - 1)/|r - R| d/dx (phi_i(r) 
+  !
+  ! with m == 1 -> d/dx , m == 2 -> d/dy , m == 3 -> d/dz
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: r(3), ints(3), ints_coulomb(3)
+  double precision :: wall0, wall1
+
+  print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu_tmp ...'
+
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                                 &
+ !$OMP DEFAULT (NONE)                           &
+ !$OMP PRIVATE (i,j,ipoint,r,ints,ints_coulomb) & 
+ !$OMP SHARED (ao_num,n_points_final_grid,x_d_dx_v_ij_erf_rk_cst_mu_tmp,final_grid_points,mu_erf)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+        call phi_j_erf_mu_r_xyz_dxyz_phi(j, i, mu_erf, r, ints)
+        call phi_j_erf_mu_r_xyz_dxyz_phi(j, i,  1.d+9, r, ints_coulomb)
+
+        x_d_dx_v_ij_erf_rk_cst_mu_tmp(1,ipoint,j,i) = ints(1) - ints_coulomb(1)
+        x_d_dx_v_ij_erf_rk_cst_mu_tmp(2,ipoint,j,i) = ints(2) - ints_coulomb(2)
+        x_d_dx_v_ij_erf_rk_cst_mu_tmp(3,ipoint,j,i) = ints(3) - ints_coulomb(3)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid,ao_num, ao_num,3)]
+
+  BEGIN_DOC
+  !
+  ! x_d_dx_v_ij_erf_rk_cst_mu_tmp(j,i,R,m) = int dr x phi_j(r)) (erf(mu(R) |r - R|) - 1)/|r - R| d/dx (phi_i(r) 
+  !
+  ! with m == 1 -> d/dx , m == 2 -> d/dy , m == 3 -> d/dz
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu ...'
+
+  call wall_time(wall0)
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        x_d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,1) = x_d_dx_v_ij_erf_rk_cst_mu_tmp(1,ipoint,j,i)
+        x_d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,2) = x_d_dx_v_ij_erf_rk_cst_mu_tmp(2,ipoint,j,i)
+        x_d_dx_v_ij_erf_rk_cst_mu(ipoint,j,i,3) = x_d_dx_v_ij_erf_rk_cst_mu_tmp(3,ipoint,j,i)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+
diff --git a/src/ao_many_one_e_ints/list_grid.irp.f b/src/ao_many_one_e_ints/list_grid.irp.f
new file mode 100644
index 00000000..d5d88007
--- /dev/null
+++ b/src/ao_many_one_e_ints/list_grid.irp.f
@@ -0,0 +1,59 @@
+ BEGIN_PROVIDER [ integer, n_pts_grid_ao_prod, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_n_pts_grid_ao_prod]
+ implicit none
+ integer :: i,j,ipoint
+ double precision :: overlap, r(3),thr, overlap_abs_gauss_r12_ao,overlap_gauss_r12_ao
+ double precision :: sigma,dist,center_ij(3),fact_gauss, alpha, center(3)
+ n_pts_grid_ao_prod = 0
+ thr = 1.d-11
+ print*,' expo_good_j_mu_1gauss = ',expo_good_j_mu_1gauss
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, r, overlap, fact_gauss, alpha, center,dist,sigma,center_ij) &
+ !$OMP SHARED  (n_points_final_grid, ao_num, thr, ao_overlap_abs_grid,n_pts_grid_ao_prod,expo_good_j_mu_1gauss,&
+ !$OMP          final_grid_points,ao_prod_center,ao_prod_sigma,ao_nucl)
+ !$OMP DO
+ do i = 1, ao_num
+! do i = 3,3
+  do j = 1, ao_num
+! do i = 22,22
+!  do j = 9,9
+   center_ij(1:3) = ao_prod_center(1:3,j,i)
+   sigma = ao_prod_sigma(j,i)
+   sigma *= sigma
+   sigma = 0.5d0 /sigma
+!   if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+   do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+    dist  = (center_ij(1) - r(1))*(center_ij(1) - r(1))
+    dist += (center_ij(2) - r(2))*(center_ij(2) - r(2))
+    dist += (center_ij(3) - r(3))*(center_ij(3) - r(3))
+    dist = dsqrt(dist)
+    call gaussian_product(sigma, center_ij, expo_good_j_mu_1gauss, r, fact_gauss, alpha, center)
+!    print*,''
+!    print*,j,i,ao_overlap_abs_grid(j,i),ao_overlap_abs(j,i)
+!    print*,r
+!    print*,dist,sigma
+!    print*,fact_gauss
+    if( fact_gauss*ao_overlap_abs_grid(j,i).lt.1.d-11)cycle
+    if(ao_nucl(i) == ao_nucl(j))then
+     overlap = overlap_abs_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    else
+     overlap = overlap_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    endif
+!    print*,overlap
+    if(dabs(overlap).lt.thr)cycle
+    n_pts_grid_ao_prod(j,i) += 1
+   enddo
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(n_pts_grid_ao_prod(:,i))
+ enddo
+ max_n_pts_grid_ao_prod = maxval(list)
+END_PROVIDER
diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f
new file mode 100644
index 00000000..e27bf723
--- /dev/null
+++ b/src/ao_many_one_e_ints/listj1b.irp.f
@@ -0,0 +1,237 @@
+
+! ---
+
+BEGIN_PROVIDER [ integer, List_all_comb_b2_size]
+
+  implicit none
+
+  List_all_comb_b2_size = 2**nucl_num
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ integer, List_all_comb_b2, (nucl_num, List_all_comb_b2_size)]
+
+  implicit none
+  integer :: i, j
+
+  if(nucl_num .gt. 32) then
+    print *, ' nucl_num = ', nucl_num, '> 32'
+    stop
+  endif
+
+  List_all_comb_b2 = 0
+
+  do i = 0, List_all_comb_b2_size-1
+    do j = 0, nucl_num-1
+      if (btest(i,j)) then
+        List_all_comb_b2(j+1,i+1) = 1
+      endif
+    enddo
+  enddo
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, List_all_comb_b2_coef, (   List_all_comb_b2_size)]
+&BEGIN_PROVIDER [ double precision, List_all_comb_b2_expo, (   List_all_comb_b2_size)]
+&BEGIN_PROVIDER [ double precision, List_all_comb_b2_cent, (3, List_all_comb_b2_size)]
+
+  implicit none
+  integer          :: i, j, k, phase
+  double precision :: tmp_alphaj, tmp_alphak
+  double precision :: tmp_cent_x, tmp_cent_y, tmp_cent_z
+
+  provide j1b_pen
+
+  List_all_comb_b2_coef = 0.d0
+  List_all_comb_b2_expo = 0.d0
+  List_all_comb_b2_cent = 0.d0
+
+  do i = 1, List_all_comb_b2_size
+
+    tmp_cent_x = 0.d0
+    tmp_cent_y = 0.d0
+    tmp_cent_z = 0.d0
+    do j = 1, nucl_num
+      tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+      List_all_comb_b2_expo(i) += tmp_alphaj
+      tmp_cent_x               += tmp_alphaj * nucl_coord(j,1)
+      tmp_cent_y               += tmp_alphaj * nucl_coord(j,2)
+      tmp_cent_z               += tmp_alphaj * nucl_coord(j,3)
+    enddo
+
+    if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
+
+    List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) 
+    List_all_comb_b2_cent(2,i) = tmp_cent_y / List_all_comb_b2_expo(i)
+    List_all_comb_b2_cent(3,i) = tmp_cent_z / List_all_comb_b2_expo(i)
+  enddo
+
+  ! ---
+
+  do i = 1, List_all_comb_b2_size
+
+    do j = 2, nucl_num, 1
+      tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+      do k = 1, j-1, 1
+        tmp_alphak = dble(List_all_comb_b2(k,i)) * j1b_pen(k)
+
+        List_all_comb_b2_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) &
+                                                              + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) &
+                                                              + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) )
+      enddo
+    enddo
+
+    if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
+
+    List_all_comb_b2_coef(i) = List_all_comb_b2_coef(i) / List_all_comb_b2_expo(i)
+  enddo
+
+  ! ---
+
+  do i = 1, List_all_comb_b2_size
+
+    phase = 0
+    do j = 1, nucl_num
+      phase += List_all_comb_b2(j,i)
+    enddo
+
+    List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i))
+  enddo
+
+  print *, ' coeff, expo & cent of list b2'
+  do i = 1, List_all_comb_b2_size
+    print*, i, List_all_comb_b2_coef(i), List_all_comb_b2_expo(i)
+    print*, List_all_comb_b2_cent(1,i), List_all_comb_b2_cent(2,i), List_all_comb_b2_cent(3,i)
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ integer, List_all_comb_b3_size]
+
+  implicit none
+
+  List_all_comb_b3_size = 3**nucl_num
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ integer, List_all_comb_b3, (nucl_num, List_all_comb_b3_size)]
+
+  implicit none
+  integer              :: i, j, ii, jj
+  integer, allocatable :: M(:,:), p(:)
+
+  if(nucl_num .gt. 32) then
+    print *, ' nucl_num = ', nucl_num, '> 32'
+    stop
+  endif
+
+  List_all_comb_b3(:,:)                     = 0
+  List_all_comb_b3(:,List_all_comb_b3_size) = 2
+
+  allocate(p(nucl_num))
+  p = 0
+
+  do i = 2, List_all_comb_b3_size-1
+    do j = 1, nucl_num
+
+      ii = 0
+      do jj = 1, j-1, 1
+        ii = ii + p(jj) * 3**(jj-1)
+      enddo
+      p(j) = modulo(i-1-ii, 3**j) / 3**(j-1)
+
+      List_all_comb_b3(j,i) = p(j)
+    enddo
+  enddo
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, List_all_comb_b3_coef, (   List_all_comb_b3_size)]
+&BEGIN_PROVIDER [ double precision, List_all_comb_b3_expo, (   List_all_comb_b3_size)]
+&BEGIN_PROVIDER [ double precision, List_all_comb_b3_cent, (3, List_all_comb_b3_size)]
+
+  implicit none
+  integer          :: i, j, k, phase
+  double precision :: tmp_alphaj, tmp_alphak, facto
+
+  provide j1b_pen
+
+  List_all_comb_b3_coef = 0.d0
+  List_all_comb_b3_expo = 0.d0
+  List_all_comb_b3_cent = 0.d0
+
+  do i = 1, List_all_comb_b3_size
+
+    do j = 1, nucl_num
+      tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
+      List_all_comb_b3_expo(i)   += tmp_alphaj
+      List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1)
+      List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2)
+      List_all_comb_b3_cent(3,i) += tmp_alphaj * nucl_coord(j,3)
+
+    enddo
+
+    if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
+    ASSERT(List_all_comb_b3_expo(i) .gt. 0d0)
+
+    List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) 
+    List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i)
+    List_all_comb_b3_cent(3,i) = List_all_comb_b3_cent(3,i) / List_all_comb_b3_expo(i)
+  enddo
+
+  ! ---
+
+  do i = 1, List_all_comb_b3_size
+
+    do j = 2, nucl_num, 1
+      tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
+      do k = 1, j-1, 1
+        tmp_alphak = dble(List_all_comb_b3(k,i)) * j1b_pen(k)
+
+        List_all_comb_b3_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) &
+                                                              + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) &
+                                                              + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) )
+      enddo
+    enddo
+
+    if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
+
+    List_all_comb_b3_coef(i) = List_all_comb_b3_coef(i) / List_all_comb_b3_expo(i)
+  enddo
+
+  ! ---
+
+  do i = 1, List_all_comb_b3_size
+
+    facto = 1.d0
+    phase = 0
+    do j = 1, nucl_num
+      tmp_alphaj = dble(List_all_comb_b3(j,i)) 
+
+      facto *= 2.d0 / (gamma(tmp_alphaj+1.d0) * gamma(3.d0-tmp_alphaj))
+      phase += List_all_comb_b3(j,i)
+    enddo
+
+    List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i))
+  enddo
+
+  print *, ' coeff, expo & cent of list b3'
+  do i = 1, List_all_comb_b3_size
+    print*, i, List_all_comb_b3_coef(i), List_all_comb_b3_expo(i)
+    print*, List_all_comb_b3_cent(1,i), List_all_comb_b3_cent(2,i), List_all_comb_b3_cent(3,i)
+  enddo
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
new file mode 100644
index 00000000..bf493fbb
--- /dev/null
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -0,0 +1,191 @@
+
+ BEGIN_PROVIDER [ integer, List_comb_thr_b2_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ List_comb_thr_b2_size = 0
+ do i = 1, ao_num
+  do j = i, ao_num
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-15)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     beta = max(beta,1.d-12)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_thr_b2_size(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+ do i = 1, ao_num
+  do j = 1, i-1
+    List_comb_thr_b2_size(j,i) = List_comb_thr_b2_size(i,j)
+  enddo
+ enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_thr_b2_size(:,i))
+ enddo
+ max_List_comb_thr_b2_size = maxval(list) 
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b2_coef, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_expo, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_cent, (3, max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ ao_abs_comb_b2_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = i, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-12)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b2_coef(icount,j,i) = coef
+      List_comb_thr_b2_expo(icount,j,i) = beta
+      List_comb_thr_b2_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b2_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+ do i = 1, ao_num
+  do j = 1, i-1
+    do icount = 1, List_comb_thr_b2_size(j,i)
+     List_comb_thr_b2_coef(icount,j,i) = List_comb_thr_b2_coef(icount,i,j)
+     List_comb_thr_b2_expo(icount,j,i) = List_comb_thr_b2_expo(icount,i,j)
+     List_comb_thr_b2_cent(1:3,icount,j,i) = List_comb_thr_b2_cent(1:3,icount,i,j)
+    enddo
+  enddo
+ enddo
+ 
+END_PROVIDER 
+
+
+ BEGIN_PROVIDER [ integer, List_comb_thr_b3_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ List_comb_thr_b3_size = 0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     beta        = List_all_comb_b3_expo  (i_1s)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     if(dabs(coef).lt.thr)cycle
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_thr_b3_size(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    List_comb_thr_b3_size(j,i) = List_comb_thr_b3_size(i,j)
+!  enddo
+! enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_thr_b3_size(:,i))
+ enddo
+ max_List_comb_thr_b3_size = maxval(list) 
+ print*,'max_List_comb_thr_b3_size =  ',max_List_comb_thr_b3_size
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b3_coef, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_expo, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_cent, (3, max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ ao_abs_comb_b3_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     beta        = List_all_comb_b3_expo  (i_1s)
+     beta = max(beta,1.d-12)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     if(dabs(coef).lt.thr)cycle
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b3_coef(icount,j,i) = coef
+      List_comb_thr_b3_expo(icount,j,i) = beta
+      List_comb_thr_b3_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b3_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    do icount = 1, List_comb_thr_b3_size(j,i)
+!     List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j)
+!     List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j)
+!     List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j)
+!    enddo
+!  enddo
+! enddo
+ 
+END_PROVIDER 
+
diff --git a/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f
new file mode 100644
index 00000000..641d25fe
--- /dev/null
+++ b/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f
@@ -0,0 +1,195 @@
+double precision function NAI_pol_mult_erf_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  BEGIN_DOC
+  ! Computes the following integral R^3 :
+  !
+  ! .. math::
+  ! 
+  !   \int dr  (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$ exp(-delta (r - D)^2 ).
+  !
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D" 
+ double precision, intent(in)    :: C_center(3),mu      ! coulomb center "C" and "mu" in the erf(mu*x)/x function
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+
+ double precision  :: NAI_pol_mult_erf
+ ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+ double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new        ! constant factor
+ double precision  :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr
+ integer           :: d(3),i,lx,ly,lz,iorder_tmp(3)
+ thr = 1.d-10
+ d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+ ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+ accu = 0.d0
+ do lx = 0, iorder_a_new(1)
+  coefx = A_new(lx,1)
+  if(dabs(coefx).lt.thr)cycle
+  iorder_tmp(1) = lx
+  do ly = 0, iorder_a_new(2)
+   coefy = A_new(ly,2)
+   coefxy = coefx * coefy 
+   if(dabs(coefxy).lt.thr)cycle
+   iorder_tmp(2) = ly
+   do lz = 0, iorder_a_new(3)
+    coefz = A_new(lz,3)
+    coefxyz = coefxy * coefz 
+    if(dabs(coefxyz).lt.thr)cycle
+    iorder_tmp(3) = lz
+    accu += coefxyz * NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B,alpha_new,beta,C_center,n_pt_max_integrals,mu)
+   enddo
+  enddo
+ enddo
+ NAI_pol_mult_erf_gauss_r12 = fact_a_new * accu 
+end
+
+subroutine erfc_mu_gauss_xyz(D_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in,xyz_ints)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-delta (r - D)^2 ) x/y/z * (1 - erf(mu |r-r'|))/ |r-r'| * (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  !   xyz_ints(1) = x , xyz_ints(2) = y, xyz_ints(3) = z, xyz_ints(4) = x^0 
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), delta,mu  ! pure gaussian "D" and mu parameter
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3),n_pt_in
+ double precision, intent(out)   :: xyz_ints(4)
+
+ double precision  :: NAI_pol_mult_erf
+ ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+ double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new        ! constant factor
+ double precision  :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,contrib,contrib_inf,mu_inf
+ integer           :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1,mm
+ integer           :: power_B_tmp(3)
+ dim1=100
+ mu_inf = 1.d+10
+ thr = 1.d-10
+ d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+ ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+ xyz_ints = 0.d0
+ do lx = 0, iorder_a_new(1)
+  coefx = A_new(lx,1)
+  if(dabs(coefx).lt.thr)cycle
+  iorder_tmp(1) = lx
+  do ly = 0, iorder_a_new(2)
+   coefy = A_new(ly,2)
+   coefxy = coefx * coefy 
+   if(dabs(coefxy).lt.thr)cycle
+   iorder_tmp(2) = ly
+   do lz = 0, iorder_a_new(3)
+    coefz = A_new(lz,3)
+    coefxyz = coefxy * coefz 
+    if(dabs(coefxyz).lt.thr)cycle
+    iorder_tmp(3) = lz
+     power_B_tmp = power_B
+     contrib = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu)  
+     contrib_inf = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu_inf)  
+     xyz_ints(4) += (contrib_inf - contrib) * coefxyz ! usual term with no x/y/z 
+                                      
+     do mm = 1, 3 
+      ! (x phi_i ) * phi_j 
+      ! x * (x - B_x)^b_x = B_x (x - B_x)^b_x + 1 * (x - B_x)^{b_x+1}
+      
+      !
+      ! first contribution :: B_x (x - B_x)^b_x :: usual integral multiplied by B_x
+      power_B_tmp = power_B
+      contrib_inf = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu_inf)  
+      contrib = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu)  
+      xyz_ints(mm) += (contrib_inf - contrib) * B_center(mm) * coefxyz 
+                                                       
+      !
+      ! second contribution :: (x - B_x)^(b_x+1) :: integral with b_x=>b_x+1 
+      power_B_tmp(mm) += 1
+      contrib = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu)  
+      contrib_inf = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B_tmp,alpha_new,beta,D_center,n_pt_in,mu_inf)  
+      xyz_ints(mm) += (contrib_inf -  contrib) * coefxyz     
+     enddo
+   enddo
+  enddo
+ enddo
+ xyz_ints *= fact_a_new 
+end
+
+
+double precision function erf_mu_gauss(D_center,delta,mu,A_center,B_center,power_A,power_B,alpha,beta,n_pt_in)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-delta (r - D)^2 ) erf(mu*|r-r'|)/ |r-r'| * (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), delta,mu  ! pure gaussian "D" and mu parameter
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3),n_pt_in
+
+ double precision  :: NAI_pol_mult_erf
+ ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+ double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new        ! constant factor
+ double precision  :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,contrib,contrib_inf,mu_inf
+ integer           :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1,mm
+ dim1=100
+ mu_inf = 1.d+10
+ thr = 1.d-10
+ d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+ ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+ erf_mu_gauss = 0.d0
+ do lx = 0, iorder_a_new(1)
+  coefx = A_new(lx,1)
+  if(dabs(coefx).lt.thr)cycle
+  iorder_tmp(1) = lx
+  do ly = 0, iorder_a_new(2)
+   coefy = A_new(ly,2)
+   coefxy = coefx * coefy 
+   if(dabs(coefxy).lt.thr)cycle
+   iorder_tmp(2) = ly
+   do lz = 0, iorder_a_new(3)
+    coefz = A_new(lz,3)
+    coefxyz = coefxy * coefz 
+    if(dabs(coefxyz).lt.thr)cycle
+    iorder_tmp(3) = lz
+    contrib = NAI_pol_mult_erf(A_center_new,B_center,iorder_tmp,power_B,alpha_new,beta,D_center,n_pt_in,mu)
+    erf_mu_gauss += contrib * coefxyz     
+   enddo
+  enddo
+ enddo
+ erf_mu_gauss *= fact_a_new 
+end
+
diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
new file mode 100644
index 00000000..54c2d95b
--- /dev/null
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -0,0 +1,340 @@
+! ---
+
+double precision function overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)          :: power_A(3),power_B(3)
+
+  double precision             :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision             :: A_new(0:max_dim,3)! new polynom
+  double precision             :: A_center_new(3)   ! new center
+  integer                      :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision             :: alpha_new         ! new exponent
+  double precision             :: fact_a_new        ! constant factor
+  double precision             :: accu, coefx, coefy, coefz, coefxy, coefxyz, thr
+  integer                      :: d(3), i, lx, ly, lz, iorder_tmp(3), dim1
+
+  dim1 = 100
+  thr  = 1.d-10
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_gauss_r12 = 0.d0
+
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)*fact_a_new
+    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+
+    do ly = 0, iorder_a_new(2)
+      coefy  = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy) .lt. thr) cycle
+      iorder_tmp(2) = ly
+
+      do lz = 0, iorder_a_new(3)
+        coefz   = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz) .lt. thr) cycle
+        iorder_tmp(3) = lz
+
+        call overlap_gaussian_xyz( A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B &
+                                 , overlap_x, overlap_y, overlap_z, overlap, dim1)
+
+        accu += coefxyz * overlap
+      enddo
+    enddo
+  enddo
+  overlap_gauss_r12 = accu
+end
+
+!---
+double precision function overlap_abs_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) |(x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )|
+  !
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+  double precision, intent(in)   :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in)   :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)            :: power_A(3),power_B(3)
+
+  double precision               :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision               :: A_new(0:max_dim,3)! new polynom
+  double precision               :: A_center_new(3)   ! new center
+  integer                        :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision               :: alpha_new         ! new exponent
+  double precision               :: fact_a_new        ! constant factor
+  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,dx,lower_exp_val
+  integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
+  dim1=50
+  lower_exp_val = 40.d0
+  thr = 1.d-12
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_abs_gauss_r12 = 0.d0
+
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)*fact_a_new
+!    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+    do ly = 0, iorder_a_new(2)
+      coefy = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy).lt.thr)cycle
+      iorder_tmp(2) = ly
+      do lz = 0, iorder_a_new(3)
+        coefz = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz).lt.thr)cycle
+        iorder_tmp(3) = lz
+        call overlap_x_abs(A_center_new(1),B_center(1),alpha_new,beta,iorder_tmp(1),power_B(1),overlap_x,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(2),B_center(2),alpha_new,beta,iorder_tmp(2),power_B(2),overlap_y,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(3),B_center(3),alpha_new,beta,iorder_tmp(3),power_B(3),overlap_z,lower_exp_val,dx,dim1)
+        accu += dabs(coefxyz * overlap_x * overlap_y * overlap_z)
+      enddo
+    enddo
+  enddo
+  overlap_abs_gauss_r12= accu
+end
+
+!---
+
+! TODO apply Gaussian product three times first
+subroutine overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_A, power_B, alpha, beta, rvec, LD_rvec, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  !   \int dr exp(-delta (r - D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2)
+  !   using an array of D_centers
+  !
+  ! n_points: nb of integrals
+  !
+  END_DOC
+
+  implicit none
+
+  include 'constants.include.F'
+
+  integer,          intent(in)  :: LD_D, LD_rvec, n_points
+  integer,          intent(in)  :: power_A(3), power_B(3)
+  double precision, intent(in)  :: D_center(LD_D,3), delta
+  double precision, intent(in)  :: A_center(3), B_center(3), alpha, beta 
+  double precision, intent(out) :: rvec(LD_rvec)
+
+  integer                       :: maxab
+  integer                       :: d(3), i, lx, ly, lz, iorder_tmp(3), ipoint
+  double precision              :: overlap_x, overlap_y, overlap_z
+  double precision              :: alpha_new
+  double precision              :: accu, thr, coefxy
+  integer,          allocatable :: iorder_a_new(:)
+  double precision, allocatable :: overlap(:)
+  double precision, allocatable :: A_new(:,:,:), A_center_new(:,:)
+  double precision, allocatable :: fact_a_new(:)
+
+  thr  = 1.d-10
+  d(:) = 0
+
+  maxab = maxval(power_A(1:3))
+
+  allocate(A_new(n_points,0:maxab,3), A_center_new(n_points,3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
+
+  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, D_center, LD_D, A_center, n_points)
+
+  rvec(:) = 0.d0
+
+  do lx = 0, iorder_a_new(1)
+    iorder_tmp(1) = lx
+
+    do ly = 0, iorder_a_new(2)
+      iorder_tmp(2) = ly
+
+      do lz = 0, iorder_a_new(3)
+        iorder_tmp(3) = lz
+
+        call overlap_gaussian_xyz_v(A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B, overlap, n_points)
+
+        do ipoint = 1, n_points
+          rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * A_new(ipoint,ly,2) * A_new(ipoint,lz,3) * overlap(ipoint)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  do ipoint = 1, n_points
+    rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint)
+  enddo
+
+  deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap)
+
+end subroutine overlap_gauss_r12_v
+
+!---
+
+subroutine overlap_gauss_xyz_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, gauss_ints)
+
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !   gauss_ints(m) = \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  ! with m == 1 ==> x, m == 2 ==> y, m == 3 ==> z
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D"
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+ double precision, intent(out)   :: gauss_ints(3)
+
+ double precision  :: overlap_x,overlap_y,overlap_z,overlap
+ ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+ double precision  :: A_new(0:max_dim,3)! new polynom
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ integer           :: power_B_new(3)
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new        ! constant factor
+ double precision  :: coefx,coefy,coefz,coefxy,coefxyz,thr
+ integer           :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1,m
+ dim1=100
+ thr = 1.d-10
+ d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , &
+                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+ ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+ gauss_ints = 0.d0
+ do lx = 0, iorder_a_new(1)
+  coefx = A_new(lx,1)
+  if(dabs(coefx).lt.thr)cycle
+  iorder_tmp(1) = lx
+  do ly = 0, iorder_a_new(2)
+   coefy = A_new(ly,2)
+   coefxy = coefx * coefy
+   if(dabs(coefxy).lt.thr)cycle
+   iorder_tmp(2) = ly
+   do lz = 0, iorder_a_new(3)
+    coefz = A_new(lz,3)
+    coefxyz = coefxy * coefz
+    if(dabs(coefxyz).lt.thr)cycle
+    iorder_tmp(3) = lz
+    do m = 1, 3
+     ! change (x-Bx)^bx --> (x-Bx)^(bx+1) + Bx(x-Bx)^bx
+     power_B_new = power_B
+     power_B_new(m) += 1 ! (x-Bx)^(bx+1)
+     call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B_new,overlap_x,overlap_y,overlap_z,overlap,dim1)
+     gauss_ints(m) += coefxyz * overlap
+
+     power_B_new = power_B
+     call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B_new,overlap_x,overlap_y,overlap_z,overlap,dim1)
+     gauss_ints(m) += coefxyz * overlap * B_center(m) ! Bx (x-Bx)^(bx)
+    enddo
+   enddo
+  enddo
+ enddo
+ gauss_ints *= fact_a_new
+end
+
+double precision function overlap_gauss_xyz_r12_specific(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,mx)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !    \int dr exp(-delta (r - D)^2 ) * x/y/z (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  ! with mx == 1 ==> x, mx == 2 ==> y, mx == 3 ==> z
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), delta  ! pure gaussian "D"
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3),mx
+
+ double precision  :: overlap_x,overlap_y,overlap_z,overlap
+ ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+ double precision  :: A_new(0:max_dim,3)! new polynom
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ integer           :: power_B_new(3)
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new        ! constant factor
+ double precision  :: coefx,coefy,coefz,coefxy,coefxyz,thr
+ integer           :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1,m
+ dim1=100
+ thr = 1.d-10
+ d = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+
+ ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+ call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , &
+                                      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+ ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+ overlap_gauss_xyz_r12_specific = 0.d0
+ do lx = 0, iorder_a_new(1)
+  coefx = A_new(lx,1)
+  if(dabs(coefx).lt.thr)cycle
+  iorder_tmp(1) = lx
+  do ly = 0, iorder_a_new(2)
+   coefy = A_new(ly,2)
+   coefxy = coefx * coefy
+   if(dabs(coefxy).lt.thr)cycle
+   iorder_tmp(2) = ly
+   do lz = 0, iorder_a_new(3)
+    coefz = A_new(lz,3)
+    coefxyz = coefxy * coefz
+    if(dabs(coefxyz).lt.thr)cycle
+    iorder_tmp(3) = lz
+    m = mx
+    ! change (x-Bx)^bx --> (x-Bx)^(bx+1) + Bx(x-Bx)^bx
+    power_B_new = power_B
+    power_B_new(m) += 1 ! (x-Bx)^(bx+1)
+    call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B_new,overlap_x,overlap_y,overlap_z,overlap,dim1)
+    overlap_gauss_xyz_r12_specific += coefxyz * overlap
+
+    power_B_new = power_B
+    call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B_new,overlap_x,overlap_y,overlap_z,overlap,dim1)
+    overlap_gauss_xyz_r12_specific += coefxyz * overlap * B_center(m) ! Bx (x-Bx)^(bx)
+   enddo
+  enddo
+ enddo
+ overlap_gauss_xyz_r12_specific *= fact_a_new
+end
diff --git a/src/ao_many_one_e_ints/stg_gauss_int.irp.f b/src/ao_many_one_e_ints/stg_gauss_int.irp.f
new file mode 100644
index 00000000..384f744b
--- /dev/null
+++ b/src/ao_many_one_e_ints/stg_gauss_int.irp.f
@@ -0,0 +1,121 @@
+double precision function ovlp_stg_gauss_int_phi_ij(D_center,gam,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral : 
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-gam (r - D)) exp(-delta * (r -D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  END_DOC
+
+ implicit none
+ double precision, intent(in)    :: D_center(3), gam ! pure Slater "D" in r-r_D
+ double precision, intent(in)    :: delta            ! gaussian        in r-r_D
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+
+ integer :: i
+ double precision :: integral,gama_gauss
+ double precision, allocatable :: expos_slat(:)
+ allocate(expos_slat(n_max_fit_slat))
+ double precision :: overlap_gauss_r12
+ ovlp_stg_gauss_int_phi_ij = 0.d0
+ call expo_fit_slater_gam(gam,expos_slat)
+ do i = 1, n_max_fit_slat
+  gama_gauss = expos_slat(i)+delta 
+  integral = overlap_gauss_r12(D_center,gama_gauss,A_center,B_center,power_A,power_B,alpha,beta)
+  ovlp_stg_gauss_int_phi_ij += coef_fit_slat_gauss(i) * integral
+ enddo
+end
+
+
+double precision function erf_mu_stg_gauss_int_phi_ij(D_center,gam,delta,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  BEGIN_DOC
+  ! Computes the following integral : 
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-gam(r - D)-delta(r - D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), gam ! pure Slater "D" in r-r_D
+ double precision, intent(in)    :: delta            ! gaussian        in r-r_D
+ double precision, intent(in)    :: C_center(3),mu      ! coulomb center "C" and "mu" in the erf(mu*x)/x function
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+
+ integer :: i
+ double precision :: NAI_pol_mult_erf_gauss_r12
+ double precision :: integral,gama_gauss
+ double precision, allocatable :: expos_slat(:)
+ allocate(expos_slat(n_max_fit_slat))
+ erf_mu_stg_gauss_int_phi_ij = 0.d0
+ call expo_fit_slater_gam(gam,expos_slat)
+ do i = 1, n_max_fit_slat
+  gama_gauss = expos_slat(i) + delta
+  integral = NAI_pol_mult_erf_gauss_r12(D_center,gama_gauss,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  erf_mu_stg_gauss_int_phi_ij += coef_fit_slat_gauss(i) * integral
+ enddo
+end
+
+double precision function overlap_stg_gauss(D_center,gam,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral : 
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-gam (r - D)) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  END_DOC
+
+ implicit none
+ double precision, intent(in)    :: D_center(3), gam ! pure Slater "D" 
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+ 
+ integer :: i
+ double precision :: expos_slat(n_max_fit_slat),integral,delta
+ double precision :: overlap_gauss_r12
+ overlap_stg_gauss = 0.d0
+ call expo_fit_slater_gam(gam,expos_slat)
+ do i = 1, n_max_fit_slat
+  delta = expos_slat(i) 
+  integral = overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  overlap_stg_gauss += coef_fit_slat_gauss(i) * integral
+ enddo
+end
+
+double precision function erf_mu_stg_gauss(D_center,gam,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  BEGIN_DOC
+  ! Computes the following integral : 
+  !
+  ! .. math::
+  ! 
+  !   \int dr exp(-gam(r - D)) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+ implicit none
+  include 'constants.include.F'
+ double precision, intent(in)    :: D_center(3), gam    ! pure Slater "D" 
+ double precision, intent(in)    :: C_center(3),mu      ! coulomb center "C" and "mu" in the erf(mu*x)/x function
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+
+ 
+ integer :: i
+ double precision :: expos_slat(n_max_fit_slat),integral,delta
+ double precision :: NAI_pol_mult_erf_gauss_r12
+ erf_mu_stg_gauss = 0.d0
+ call expo_fit_slater_gam(gam,expos_slat)
+ do i = 1, n_max_fit_slat
+  delta = expos_slat(i) 
+  integral = NAI_pol_mult_erf_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  erf_mu_stg_gauss += coef_fit_slat_gauss(i) * integral
+ enddo
+end
diff --git a/src/ao_many_one_e_ints/taylor_exp.irp.f b/src/ao_many_one_e_ints/taylor_exp.irp.f
new file mode 100644
index 00000000..9857875a
--- /dev/null
+++ b/src/ao_many_one_e_ints/taylor_exp.irp.f
@@ -0,0 +1,101 @@
+double precision function exp_dl(x,n)
+ implicit none
+ double precision, intent(in) :: x
+ integer         , intent(in) :: n
+ integer :: i
+ exp_dl = 1.d0
+ do i = 1, n
+  exp_dl += fact_inv(i) * x**dble(i)
+ enddo
+end
+
+subroutine exp_dl_rout(x,n, array)
+ implicit none
+ double precision, intent(in) :: x
+ integer         , intent(in) :: n
+ double precision, intent(out)::  array(0:n)
+ integer :: i
+ double precision :: accu
+ accu = 1.d0
+ array(0) = 1.d0
+ do i = 1, n
+  accu += fact_inv(i) * x**dble(i)
+  array(i) = accu
+ enddo
+end
+
+subroutine exp_dl_ovlp_stg_phi_ij(zeta,D_center,gam,delta,A_center,B_center,power_A,power_B,alpha,beta,n_taylor,array_ints,integral_taylor,exponent_exp)
+  BEGIN_DOC
+  ! Computes the following integrals : 
+  !
+  ! .. math::
+  ! 
+  !   array(i) = \int dr EXP{exponent_exp * [exp(-gam*i (r - D)) exp(-delta*i * (r -D)^2)] (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !
+  !
+  ! and gives back the Taylor expansion of the exponential in integral_taylor
+  END_DOC
+
+ implicit none
+ double precision, intent(in)    :: zeta             ! prefactor of the argument of the exp(-zeta*x)
+ integer, intent(in)             :: n_taylor         ! order of the Taylor expansion of the exponential
+ double precision, intent(in)    :: D_center(3), gam ! pure Slater "D" in r-r_D
+ double precision, intent(in)    :: delta            ! gaussian        in r-r_D
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ double precision, intent(in)    :: exponent_exp
+ integer, intent(in)             :: power_A(3),power_B(3)
+ double precision, intent(out)   :: array_ints(0:n_taylor),integral_taylor
+
+ integer :: i,dim1
+ double precision :: delta_exp,gam_exp,ovlp_stg_gauss_int_phi_ij
+ double precision :: overlap_x,overlap_y,overlap_z,overlap
+ dim1=100
+ call overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
+ array_ints(0) = overlap
+ integral_taylor = array_ints(0)
+ do i = 1, n_taylor
+  delta_exp = dble(i) * delta
+  gam_exp   = dble(i) * gam
+  array_ints(i) = ovlp_stg_gauss_int_phi_ij(D_center,gam_exp,delta_exp,A_center,B_center,power_A,power_B,alpha,beta)
+  integral_taylor += (-zeta*exponent_exp)**dble(i) * fact_inv(i) * array_ints(i)
+ enddo
+
+end
+
+subroutine exp_dl_erf_stg_phi_ij(zeta,D_center,gam,delta,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu,n_taylor,array_ints,integral_taylor)
+  BEGIN_DOC
+  ! Computes the following integrals : 
+  !
+  ! .. math::
+  ! 
+  !   array(i) = \int dr exp(-gam*i (r - D)) exp(-delta*i * (r -D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  !
+  ! and gives back the Taylor expansion of the exponential in integral_taylor
+  END_DOC
+
+ implicit none
+ integer, intent(in)             :: n_taylor         ! order of the Taylor expansion of the exponential
+ double precision, intent(in)    :: zeta             ! prefactor of the argument of the exp(-zeta*x)
+ double precision, intent(in)    :: D_center(3), gam ! pure Slater "D" in r-r_D
+ double precision, intent(in)    :: delta            ! gaussian        in r-r_D
+ double precision, intent(in)    :: C_center(3),mu      ! coulomb center "C" and "mu" in the erf(mu*x)/x function
+ double precision, intent(in)    :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+ integer, intent(in)             :: power_A(3),power_B(3)
+ double precision, intent(out)   :: array_ints(0:n_taylor),integral_taylor
+
+ integer :: i,dim1
+ double precision :: delta_exp,gam_exp,NAI_pol_mult_erf,erf_mu_stg_gauss_int_phi_ij
+ dim1=100
+ 
+ array_ints(0) = NAI_pol_mult_erf(A_center,B_center,power_A,power_B,alpha,beta,C_center,n_pt_max_integrals,mu)
+ integral_taylor = array_ints(0)
+ do i = 1, n_taylor
+  delta_exp = dble(i) * delta
+  gam_exp   = dble(i) * gam
+  array_ints(i) = erf_mu_stg_gauss_int_phi_ij(D_center,gam_exp,delta_exp,A_center,B_center,power_A,power_B,alpha,beta,C_center,mu)
+  integral_taylor += (-zeta)**dble(i) * fact_inv(i) * array_ints(i)
+ enddo
+
+end
diff --git a/src/ao_many_one_e_ints/xyz_grad_xyz_ao_pol.irp.f b/src/ao_many_one_e_ints/xyz_grad_xyz_ao_pol.irp.f
new file mode 100644
index 00000000..eed1c348
--- /dev/null
+++ b/src/ao_many_one_e_ints/xyz_grad_xyz_ao_pol.irp.f
@@ -0,0 +1,343 @@
+ BEGIN_PROVIDER [double precision, coef_xyz_ao, (2,3,ao_num)]
+&BEGIN_PROVIDER [integer, power_xyz_ao, (2,3,ao_num)]
+ implicit none
+ BEGIN_DOC
+! coefficient for the basis function :: (x * phi_i(r), y * phi_i(r), * z_phi(r))
+!
+! x * (x - A_x)^a_x = A_x (x - A_x)^a_x + 1 * (x - A_x)^{a_x+1}
+ END_DOC
+ integer :: i,j,k,num_ao,power_ao(1:3)
+ double precision :: center_ao(1:3)
+ do i = 1, ao_num
+  power_ao(1:3)= ao_power(i,1:3) 
+  num_ao = ao_nucl(i)
+  center_ao(1:3) = nucl_coord(num_ao,1:3)
+  do j = 1, 3
+   coef_xyz_ao(1,j,i) = center_ao(j) ! A_x (x - A_x)^a_x
+   power_xyz_ao(1,j,i)= power_ao(j)
+   coef_xyz_ao(2,j,i) = 1.d0         ! 1 * (x - A_x)^a_{x+1}
+   power_xyz_ao(2,j,i)= power_ao(j) + 1
+  enddo
+ enddo
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, ao_coef_ord_grad_transp, (2,3,ao_prim_num_max,ao_num) ]
+&BEGIN_PROVIDER [ integer, power_ord_grad_transp, (2,3,ao_num) ]
+  implicit none
+  BEGIN_DOC
+  ! grad AO in terms of polynoms and coefficients 
+  ! 
+  ! WARNING !!!! SOME polynoms might be negative !!!!! 
+  !
+  ! WHEN IT IS THE CASE, coefficients are ZERO 
+  END_DOC
+  integer                        :: i,j,power_ao(3), m,kk
+  do j=1, ao_num
+    power_ao(1:3)= ao_power(j,1:3) 
+    do m = 1, 3
+     power_ord_grad_transp(1,m,j) = power_ao(m) - 1
+     power_ord_grad_transp(2,m,j) = power_ao(m) + 1
+    enddo
+    do i=1, ao_prim_num_max
+     do m = 1, 3
+       ao_coef_ord_grad_transp(1,m,i,j) = ao_coef_normalized_ordered(j,i) * dble(power_ao(m)) ! a_x * c_i 
+       ao_coef_ord_grad_transp(2,m,i,j) = -2.d0 * ao_coef_normalized_ordered(j,i) * ao_expo_ordered_transp(i,j) ! -2 * c_i * alpha_i 
+       do kk = 1, 2
+        if(power_ord_grad_transp(kk,m,j).lt.0)then
+         ao_coef_ord_grad_transp(kk,m,i,j) = 0.d0
+        endif
+       enddo
+     enddo
+    enddo
+  enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ double precision, ao_coef_ord_xyz_grad_transp, (4,3,ao_prim_num_max,ao_num) ]
+&BEGIN_PROVIDER [ integer, power_ord_xyz_grad_transp, (4,3,ao_num) ]
+  implicit none
+  BEGIN_DOC
+  ! x * d/dx of an AO in terms of polynoms and coefficients 
+  !
+  ! WARNING !!!! SOME polynoms might be negative !!!!! 
+  !
+  ! WHEN IT IS THE CASE, coefficients are ZERO 
+  END_DOC
+  integer                        :: i,j,power_ao(3), m,num_ao,kk
+  double precision :: center_ao(1:3)
+  do j=1, ao_num
+   power_ao(1:3)= ao_power(j,1:3) 
+   num_ao = ao_nucl(j)
+   center_ao(1:3) = nucl_coord(num_ao,1:3)
+   do m = 1, 3
+     power_ord_xyz_grad_transp(1,m,j)   = power_ao(m) - 1
+     power_ord_xyz_grad_transp(2,m,j)   = power_ao(m)
+     power_ord_xyz_grad_transp(3,m,j)   = power_ao(m) + 1
+     power_ord_xyz_grad_transp(4,m,j)   = power_ao(m) + 2
+     do kk = 1, 4
+      if(power_ord_xyz_grad_transp(kk,m,j).lt.0)then
+       power_ord_xyz_grad_transp(kk,m,j) = -1
+      endif
+     enddo
+   enddo
+   do i=1, ao_prim_num_max
+    do m = 1, 3
+     ao_coef_ord_xyz_grad_transp(1,m,i,j) = dble(power_ao(m)) * ao_coef_normalized_ordered(j,i) * center_ao(m)
+     ao_coef_ord_xyz_grad_transp(2,m,i,j) = dble(power_ao(m)) * ao_coef_normalized_ordered(j,i) 
+     ao_coef_ord_xyz_grad_transp(3,m,i,j) = -2.d0 * ao_coef_normalized_ordered(j,i) * ao_expo_ordered_transp(i,j) * center_ao(m)
+     ao_coef_ord_xyz_grad_transp(4,m,i,j) = -2.d0 * ao_coef_normalized_ordered(j,i) * ao_expo_ordered_transp(i,j) 
+     do kk = 1, 4
+      if(power_ord_xyz_grad_transp(kk,m,j).lt.0)then
+       ao_coef_ord_xyz_grad_transp(kk,m,i,j) = 0.d0
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+
+END_PROVIDER
+
+subroutine xyz_grad_phi_ao(r,i_ao,xyz_grad_phi)
+ implicit none
+ integer, intent(in) :: i_ao
+ double precision, intent(in) :: r(3)
+ double precision, intent(out):: xyz_grad_phi(3) ! x * d/dx phi i, y * d/dy phi_i, z * d/dz phi_
+ double precision :: center_ao(3),beta
+ double precision :: accu(3,4),dr(3),r2,pol_usual(3)
+ integer :: m,power_ao(3),num_ao,j_prim
+ power_ao(1:3)= ao_power(i_ao,1:3) 
+ num_ao = ao_nucl(i_ao)
+ center_ao(1:3) = nucl_coord(num_ao,1:3)
+ dr(1) = (r(1) - center_ao(1))
+ dr(2) = (r(2) - center_ao(2))
+ dr(3) = (r(3) - center_ao(3))
+ r2 = 0.d0
+ do m = 1, 3
+  r2 += dr(m)*dr(m)
+ enddo
+ ! computes the gaussian part 
+ accu = 0.d0
+ do j_prim =1,ao_prim_num(i_ao)
+   beta = ao_expo_ordered_transp(j_prim,i_ao)
+   if(dabs(beta*r2).gt.50.d0)cycle
+   do m = 1, 3
+    accu(m,1) += ao_coef_ord_xyz_grad_transp(1,m,j_prim,i_ao) * dexp(-beta*r2) 
+    accu(m,2) += ao_coef_ord_xyz_grad_transp(2,m,j_prim,i_ao) * dexp(-beta*r2) 
+    accu(m,3) += ao_coef_ord_xyz_grad_transp(3,m,j_prim,i_ao) * dexp(-beta*r2) 
+    accu(m,4) += ao_coef_ord_xyz_grad_transp(4,m,j_prim,i_ao) * dexp(-beta*r2) 
+   enddo
+ enddo
+ ! computes the polynom part
+ pol_usual = 0.d0
+ pol_usual(1) = dr(2)**dble(power_ao(2)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(2) = dr(1)**dble(power_ao(1)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(3) = dr(1)**dble(power_ao(1)) * dr(2)**dble(power_ao(2)) 
+
+ xyz_grad_phi = 0.d0
+ do m = 1, 3
+  xyz_grad_phi(m) += accu(m,2) * pol_usual(m) * dr(m)**dble(power_ord_xyz_grad_transp(2,m,i_ao))
+  xyz_grad_phi(m) += accu(m,3) * pol_usual(m) * dr(m)**dble(power_ord_xyz_grad_transp(3,m,i_ao))
+  xyz_grad_phi(m) += accu(m,4) * pol_usual(m) * dr(m)**dble(power_ord_xyz_grad_transp(4,m,i_ao))
+  if(power_ord_xyz_grad_transp(1,m,i_ao).lt.0)cycle
+  xyz_grad_phi(m) += accu(m,1) * pol_usual(m) * dr(m)**dble(power_ord_xyz_grad_transp(1,m,i_ao))
+ enddo
+end
+
+subroutine grad_phi_ao(r,i_ao,grad_xyz_phi)
+ implicit none
+ integer, intent(in) :: i_ao
+ double precision, intent(in) :: r(3)
+ double precision, intent(out):: grad_xyz_phi(3) ! x * phi i, y * phi_i, z * phi_
+ double precision :: center_ao(3),beta
+ double precision :: accu(3,2),dr(3),r2,pol_usual(3)
+ integer :: m,power_ao(3),num_ao,j_prim
+ power_ao(1:3)= ao_power(i_ao,1:3) 
+ num_ao = ao_nucl(i_ao)
+ center_ao(1:3) = nucl_coord(num_ao,1:3)
+ dr(1) = (r(1) - center_ao(1))
+ dr(2) = (r(2) - center_ao(2))
+ dr(3) = (r(3) - center_ao(3))
+ r2 = 0.d0
+ do m = 1, 3
+  r2 += dr(m)*dr(m)
+ enddo
+ ! computes the gaussian part 
+ accu = 0.d0
+ do j_prim =1,ao_prim_num(i_ao)
+   beta = ao_expo_ordered_transp(j_prim,i_ao)
+   if(dabs(beta*r2).gt.50.d0)cycle
+   do m = 1, 3
+    accu(m,1) += ao_coef_ord_grad_transp(1,m,j_prim,i_ao) * dexp(-beta*r2) 
+    accu(m,2) += ao_coef_ord_grad_transp(2,m,j_prim,i_ao) * dexp(-beta*r2) 
+   enddo
+ enddo
+ ! computes the polynom part
+ pol_usual = 0.d0
+ pol_usual(1) = dr(2)**dble(power_ao(2)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(2) = dr(1)**dble(power_ao(1)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(3) = dr(1)**dble(power_ao(1)) * dr(2)**dble(power_ao(2)) 
+ do m = 1, 3
+  grad_xyz_phi(m)  = accu(m,2) * pol_usual(m) * dr(m)**dble(power_ord_grad_transp(2,m,i_ao))
+  if(power_ao(m)==0)cycle
+  grad_xyz_phi(m) += accu(m,1) * pol_usual(m) * dr(m)**dble(power_ord_grad_transp(1,m,i_ao))
+ enddo
+end
+
+subroutine xyz_phi_ao(r,i_ao,xyz_phi)
+ implicit none
+ integer, intent(in) :: i_ao
+ double precision, intent(in) :: r(3)
+ double precision, intent(out):: xyz_phi(3) ! x * phi i, y * phi_i, z * phi_i
+ double precision :: center_ao(3),beta
+ double precision :: accu,dr(3),r2,pol_usual(3)
+ integer :: m,power_ao(3),num_ao
+ power_ao(1:3)= ao_power(i_ao,1:3) 
+ num_ao = ao_nucl(i_ao)
+ center_ao(1:3) = nucl_coord(num_ao,1:3)
+ dr(1) = (r(1) - center_ao(1))
+ dr(2) = (r(2) - center_ao(2))
+ dr(3) = (r(3) - center_ao(3))
+ r2 = 0.d0
+ do m = 1, 3
+  r2 += dr(m)*dr(m)
+ enddo
+ ! computes the gaussian part 
+ accu = 0.d0
+ do m=1,ao_prim_num(i_ao)
+   beta = ao_expo_ordered_transp(m,i_ao)
+   if(dabs(beta*r2).gt.50.d0)cycle
+   accu += ao_coef_normalized_ordered_transp(m,i_ao) * dexp(-beta*r2)
+ enddo
+ ! computes the polynom part
+ pol_usual = 0.d0
+ pol_usual(1) = dr(2)**dble(power_ao(2)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(2) = dr(1)**dble(power_ao(1)) * dr(3)**dble(power_ao(3)) 
+ pol_usual(3) = dr(1)**dble(power_ao(1)) * dr(2)**dble(power_ao(2)) 
+ do m = 1, 3
+  xyz_phi(m) = accu * pol_usual(m) * dr(m)**(dble(power_ao(m))) * ( coef_xyz_ao(1,m,i_ao) + coef_xyz_ao(2,m,i_ao) * dr(m) )
+ enddo
+end
+
+
+subroutine test_pol_xyz
+ implicit none
+ integer :: ipoint,i,j,m,jpoint
+ double precision :: r1(3),derf_mu_x
+ double precision :: weight1,r12,xyz_phi(3),grad_phi(3),xyz_grad_phi(3)
+ double precision, allocatable :: aos_array(:),aos_grad_array(:,:)
+ double precision :: num_xyz_phi(3),num_grad_phi(3),num_xyz_grad_phi(3)
+ double precision :: accu_xyz_phi(3),accu_grad_phi(3),accu_xyz_grad_phi(3)
+ double precision :: meta_accu_xyz_phi(3),meta_accu_grad_phi(3),meta_accu_xyz_grad_phi(3)
+ allocate(aos_array(ao_num),aos_grad_array(3,ao_num))
+ meta_accu_xyz_phi     = 0.d0
+ meta_accu_grad_phi    = 0.d0
+ meta_accu_xyz_grad_phi= 0.d0
+ do i = 1, ao_num
+  accu_xyz_phi     = 0.d0
+  accu_grad_phi    = 0.d0
+  accu_xyz_grad_phi= 0.d0
+
+  do ipoint = 1, n_points_final_grid
+   r1(:) = final_grid_points(:,ipoint)
+   weight1 = final_weight_at_r_vector(ipoint)
+   call give_all_aos_and_grad_at_r(r1,aos_array,aos_grad_array)
+   do m = 1, 3
+    num_xyz_phi(m)      = r1(m) *  aos_array(i)  
+    num_grad_phi(m)     = aos_grad_array(m,i)  
+    num_xyz_grad_phi(m) = r1(m) *  aos_grad_array(m,i) 
+   enddo
+   call xyz_phi_ao(r1,i,xyz_phi)
+   call grad_phi_ao(r1,i,grad_phi)
+   call xyz_grad_phi_ao(r1,i,xyz_grad_phi)
+   do m = 1, 3
+    accu_xyz_phi(m)      += weight1 * dabs(num_xyz_phi(m)      -  xyz_phi(m)     )
+    accu_grad_phi(m)     += weight1 * dabs(num_grad_phi(m)     -  grad_phi(m)    )
+    accu_xyz_grad_phi(m) += weight1 * dabs(num_xyz_grad_phi(m) -  xyz_grad_phi(m))
+   enddo
+  enddo
+  print*,''
+  print*,''
+  print*,'i,',i
+  print*,''
+  do m = 1, 3
+!    print*, 'm, accu_xyz_phi(m)  ' ,m, accu_xyz_phi(m)  
+!    print*, 'm, accu_grad_phi(m) ' ,m, accu_grad_phi(m)         
+    print*, 'm, accu_xyz_grad_phi' ,m, accu_xyz_grad_phi(m)
+  enddo
+  do m = 1, 3
+   meta_accu_xyz_phi(m) += dabs(accu_xyz_phi(m))
+   meta_accu_grad_phi(m) += dabs(accu_grad_phi(m))
+   meta_accu_xyz_grad_phi(m) += dabs(accu_xyz_grad_phi(m))
+  enddo
+ enddo
+  do m = 1, 3
+!    print*, 'm, meta_accu_xyz_phi(m)  ' ,m, meta_accu_xyz_phi(m)  
+!    print*, 'm, meta_accu_grad_phi(m) ' ,m, meta_accu_grad_phi(m)         
+    print*, 'm, meta_accu_xyz_grad_phi' ,m, meta_accu_xyz_grad_phi(m)
+  enddo
+
+
+
+end
+
+subroutine test_ints_semi_bis
+ implicit none
+ integer :: ipoint,i,j,m
+ double precision :: r1(3), aos_grad_array_r1(3, ao_num), aos_array_r1(ao_num)
+ double precision :: C_center(3), weight1,mu_in,r12,derf_mu_x,dxyz_ints(3),NAI_pol_mult_erf_ao
+ double precision :: ao_mat(ao_num,ao_num),ao_xmat(3,ao_num,ao_num),accu1, accu2(3)
+ mu_in = 0.5d0
+ C_center = 0.d0
+ C_center(1) = 0.25d0
+ C_center(3) = 1.12d0
+ C_center(2) = -1.d0
+ ao_mat = 0.d0
+ ao_xmat = 0.d0
+ do ipoint = 1, n_points_final_grid
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+  call give_all_aos_and_grad_at_r(r1,aos_array_r1,aos_grad_array_r1)
+  weight1 = final_weight_at_r_vector(ipoint)
+  r12 = (r1(1) - C_center(1))**2.d0 + (r1(2) - C_center(2))**2.d0 + (r1(3) - C_center(3))**2.d0 
+  r12 = dsqrt(r12)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    ao_mat(j,i)  += aos_array_r1(i) * aos_array_r1(j) * weight1 * derf_mu_x(mu_in,r12)
+    do m = 1, 3
+     ao_xmat(m,j,i) += r1(m) * aos_array_r1(j) * aos_grad_array_r1(m,i) * weight1 * derf_mu_x(mu_in,r12)
+    enddo
+   enddo
+  enddo
+ enddo
+
+ accu1 = 0.d0
+ accu2 = 0.d0
+ accu1relat = 0.d0
+ accu2relat = 0.d0
+ double precision :: accu1relat, accu2relat(3)
+ double precision :: contrib(3)
+ do i = 1, ao_num
+  do j = 1, ao_num
+   call phi_j_erf_mu_r_xyz_dxyz_phi(i,j,mu_in, C_center, dxyz_ints)
+   print*,''
+   print*,'i,j',i,j
+   print*,dxyz_ints(:)
+   print*,ao_xmat(:,j,i)
+   do m = 1, 3
+    contrib(m) = dabs(ao_xmat(m,j,i) - dxyz_ints(m))
+    accu2(m) += contrib(m)
+    if(dabs(ao_xmat(m,j,i)).gt.1.d-10)then
+     accu2relat(m) += dabs(ao_xmat(m,j,i) - dxyz_ints(m))/dabs(ao_xmat(m,j,i))
+    endif
+   enddo
+    print*,contrib
+  enddo
+   print*,''
+ enddo
+ print*,'accu2relat = '
+ print*, accu2relat /dble(ao_num * ao_num)
+
+end
+
+
diff --git a/src/bi_ortho_mos/EZFIO.cfg b/src/bi_ortho_mos/EZFIO.cfg
new file mode 100644
index 00000000..9b06a655
--- /dev/null
+++ b/src/bi_ortho_mos/EZFIO.cfg
@@ -0,0 +1,11 @@
+[mo_r_coef]
+type: double precision
+doc: right-coefficient of the i-th |AO| on the j-th |MO|
+interface: ezfio
+size: (ao_basis.ao_num,mo_basis.mo_num)
+
+[mo_l_coef]
+type: double precision
+doc: right-coefficient of the i-th |AO| on the j-th |MO|
+interface: ezfio
+size: (ao_basis.ao_num,mo_basis.mo_num)
diff --git a/src/bi_ortho_mos/NEED b/src/bi_ortho_mos/NEED
new file mode 100644
index 00000000..2a2196e5
--- /dev/null
+++ b/src/bi_ortho_mos/NEED
@@ -0,0 +1,3 @@
+mo_basis
+becke_numerical_grid
+dft_utils_in_r
diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f
new file mode 100644
index 00000000..2dad9485
--- /dev/null
+++ b/src/bi_ortho_mos/bi_density.irp.f
@@ -0,0 +1,70 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, TCSCF_bi_ort_dm_ao_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao_alpha(i,j) = <Chi_0| a^dagger_i,alpha a_j,alpha |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the alpha density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
+  implicit none
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
+            , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
+            !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, TCSCF_bi_ort_dm_ao_alpha, size(TCSCF_bi_ort_dm_ao_alpha, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao_beta(i,j) = <Chi_0| a^dagger_i,beta a_j,beta |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
+  implicit none
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
+            , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
+            !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao(i,j) = <Chi_0| a^dagger_i,beta+alpha a_j,beta+alpha |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the total electronic density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
+  implicit none
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_alpha, 1))
+
+  if(elec_alpha_num==elec_beta_num) then
+    TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_alpha
+  else
+    ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_beta, 1))
+    TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_beta
+  endif
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f
new file mode 100644
index 00000000..42130575
--- /dev/null
+++ b/src/bi_ortho_mos/bi_ort_mos_in_r.irp.f
@@ -0,0 +1,137 @@
+
+! TODO: left & right MO without duplicate AO calculation
+
+! ---
+
+BEGIN_PROVIDER[double precision, mos_r_in_r_array, (mo_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  ! mos_in_r_array(i,j) = value of the ith RIGHT mo on the jth grid point
+  END_DOC
+
+  implicit none
+  integer          :: i, j
+  double precision :: mos_array(mo_num), r(3)
+
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i, j, r, mos_array) & 
+ !$OMP SHARED (mos_r_in_r_array, n_points_final_grid, mo_num, final_grid_points)
+  do i = 1, n_points_final_grid
+    r(1) = final_grid_points(1,i)
+    r(2) = final_grid_points(2,i)
+    r(3) = final_grid_points(3,i)
+    call give_all_mos_r_at_r(r, mos_array)
+    do j = 1, mo_num
+      mos_r_in_r_array(j,i) = mos_array(j)
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+ 
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER[double precision, mos_r_in_r_array_transp, (n_points_final_grid, mo_num)]
+
+  BEGIN_DOC
+  ! mos_r_in_r_array_transp(i,j) = value of the jth mo on the ith grid point
+  END_DOC
+
+  implicit none
+  integer :: i,j
+
+  do i = 1, n_points_final_grid
+    do j = 1, mo_num
+      mos_r_in_r_array_transp(i,j) = mos_r_in_r_array(j,i) 
+    enddo
+  enddo
+ 
+END_PROVIDER
+
+! ---
+
+subroutine give_all_mos_r_at_r(r, mos_r_array)
+
+  BEGIN_DOC
+  ! mos_r_array(i) = ith RIGHT MO function evaluated at "r"
+  END_DOC
+
+  implicit none
+  double precision, intent(in)  :: r(3)
+  double precision, intent(out) :: mos_r_array(mo_num)
+  double precision              :: aos_array(ao_num)
+
+  call give_all_aos_at_r(r, aos_array)
+  call dgemv('N', mo_num, ao_num, 1.d0, mo_r_coef_transp, mo_num, aos_array, 1, 0.d0, mos_r_array, 1)
+
+end subroutine give_all_mos_r_at_r
+
+! ---
+
+BEGIN_PROVIDER[double precision, mos_l_in_r_array, (mo_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  ! mos_in_r_array(i,j) = value of the ith LEFT mo on the jth grid point
+  END_DOC
+
+  implicit none
+  integer          :: i, j
+  double precision :: mos_array(mo_num), r(3)
+
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i,r,mos_array,j) & 
+ !$OMP SHARED(mos_l_in_r_array,n_points_final_grid,mo_num,final_grid_points)
+  do i = 1, n_points_final_grid
+    r(1) = final_grid_points(1,i)
+    r(2) = final_grid_points(2,i)
+    r(3) = final_grid_points(3,i)
+    call give_all_mos_l_at_r(r, mos_array)
+    do j = 1, mo_num
+      mos_l_in_r_array(j,i) = mos_array(j)
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+ 
+END_PROVIDER
+
+! ---
+
+subroutine give_all_mos_l_at_r(r, mos_l_array)
+
+  BEGIN_DOC
+  ! mos_l_array(i) = ith LEFT MO function evaluated at "r"
+  END_DOC
+
+  implicit none
+  double precision, intent(in)  :: r(3)
+  double precision, intent(out) :: mos_l_array(mo_num)
+  double precision              :: aos_array(ao_num)
+
+  call give_all_aos_at_r(r, aos_array)
+  call dgemv('N', mo_num, ao_num, 1.d0, mo_l_coef_transp, mo_num, aos_array, 1, 0.d0, mos_l_array, 1)
+
+end subroutine give_all_mos_l_at_r
+
+! ---
+
+BEGIN_PROVIDER[double precision, mos_l_in_r_array_transp,(n_points_final_grid,mo_num)]
+
+  BEGIN_DOC
+  ! mos_l_in_r_array_transp(i,j) = value of the jth mo on the ith grid point
+  END_DOC
+
+  implicit none
+  integer :: i, j
+
+  do i = 1, n_points_final_grid
+    do j = 1, mo_num
+      mos_l_in_r_array_transp(i,j) = mos_l_in_r_array(j,i) 
+    enddo
+  enddo
+ 
+END_PROVIDER
+
+! ---
+
diff --git a/src/bi_ortho_mos/grad_bi_ort_mos_in_r.irp.f b/src/bi_ortho_mos/grad_bi_ort_mos_in_r.irp.f
new file mode 100644
index 00000000..5478fa5c
--- /dev/null
+++ b/src/bi_ortho_mos/grad_bi_ort_mos_in_r.irp.f
@@ -0,0 +1,100 @@
+ BEGIN_PROVIDER[double precision, mos_r_grad_in_r_array,(mo_num,n_points_final_grid,3)]
+ implicit none
+ BEGIN_DOC
+ ! mos_r_grad_in_r_array(i,j,k)          = value of the kth component of the gradient of ith RIGHT mo on the jth grid point
+ !
+ ! k = 1 : x, k= 2, y, k  3, z
+ END_DOC
+ integer :: m
+ mos_r_grad_in_r_array = 0.d0
+ do m=1,3
+  call dgemm('N','N',mo_num,n_points_final_grid,ao_num,1.d0,mo_r_coef_transp,mo_num,aos_grad_in_r_array(1,1,m),ao_num,0.d0,mos_r_grad_in_r_array(1,1,m),mo_num)
+ enddo
+ END_PROVIDER
+
+ BEGIN_PROVIDER[double precision, mos_r_grad_in_r_array_transp,(3,mo_num,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! mos_r_grad_in_r_array_transp(i,j,k)   = value of the kth component of the gradient of jth RIGHT mo on the ith grid point
+ !
+ ! k = 1 : x, k= 2, y, k  3, z
+ END_DOC
+ integer :: m
+ integer  :: i,j
+ mos_r_grad_in_r_array_transp = 0.d0
+ do i = 1, n_points_final_grid
+  do j = 1, mo_num
+   do m = 1, 3
+     mos_r_grad_in_r_array_transp(m,j,i) = mos_r_grad_in_r_array(j,i,m)
+   enddo
+  enddo
+ enddo                                                                                                                                                                                 
+ END_PROVIDER
+
+ BEGIN_PROVIDER[double precision, mos_r_grad_in_r_array_transp_bis,(3,n_points_final_grid,mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! mos_r_grad_in_r_array_transp(i,j,k)   = value of the ith component of the gradient on the jth grid point of jth RIGHT MO 
+ END_DOC
+ integer :: m
+ integer  :: i,j
+ mos_r_grad_in_r_array_transp_bis = 0.d0
+ do j = 1, mo_num
+  do i = 1, n_points_final_grid
+   do m = 1, 3
+     mos_r_grad_in_r_array_transp_bis(m,i,j) = mos_r_grad_in_r_array(j,i,m)
+   enddo
+  enddo
+ enddo                                                                                                                                                                                 
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER[double precision, mos_l_grad_in_r_array,(mo_num,n_points_final_grid,3)]
+ implicit none
+ BEGIN_DOC
+ ! mos_l_grad_in_r_array(i,j,k)          = value of the kth component of the gradient of ith RIGHT mo on the jth grid point
+ !
+ ! k = 1 : x, k= 2, y, k  3, z
+ END_DOC
+ integer :: m
+ mos_l_grad_in_r_array = 0.d0
+ do m=1,3
+  call dgemm('N','N',mo_num,n_points_final_grid,ao_num,1.d0,mo_r_coef_transp,mo_num,aos_grad_in_r_array(1,1,m),ao_num,0.d0,mos_l_grad_in_r_array(1,1,m),mo_num)
+ enddo
+ END_PROVIDER
+
+ BEGIN_PROVIDER[double precision, mos_l_grad_in_r_array_transp,(3,mo_num,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! mos_l_grad_in_r_array_transp(i,j,k)   = value of the kth component of the gradient of jth RIGHT mo on the ith grid point
+ !
+ ! k = 1 : x, k= 2, y, k  3, z
+ END_DOC
+ integer :: m
+ integer  :: i,j
+ mos_l_grad_in_r_array_transp = 0.d0
+ do i = 1, n_points_final_grid
+  do j = 1, mo_num
+   do m = 1, 3
+     mos_l_grad_in_r_array_transp(m,j,i) = mos_l_grad_in_r_array(j,i,m)
+   enddo
+  enddo
+ enddo                                                                                                                                                                                 
+ END_PROVIDER
+
+ BEGIN_PROVIDER[double precision, mos_l_grad_in_r_array_transp_bis,(3,n_points_final_grid,mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! mos_l_grad_in_r_array_transp(i,j,k)   = value of the ith component of the gradient on the jth grid point of jth RIGHT MO 
+ END_DOC
+ integer :: m
+ integer  :: i,j
+ mos_l_grad_in_r_array_transp_bis = 0.d0
+ do j = 1, mo_num
+  do i = 1, n_points_final_grid
+   do m = 1, 3
+     mos_l_grad_in_r_array_transp_bis(m,i,j) = mos_l_grad_in_r_array(j,i,m)
+   enddo
+  enddo
+ enddo                                                                                                                                                                                 
+ END_PROVIDER
diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f
new file mode 100644
index 00000000..d51999fc
--- /dev/null
+++ b/src/bi_ortho_mos/mos_rl.irp.f
@@ -0,0 +1,224 @@
+
+! ---
+
+subroutine ao_to_mo_bi_ortho(A_ao, LDA_ao, A_mo, LDA_mo)
+
+  BEGIN_DOC
+  !
+  ! Transform A from the |AO| basis to the BI ORTHONORMAL MOS 
+  !
+  ! $C_L^\dagger.A_{ao}.C_R$ where C_L and C_R are the LEFT and RIGHT MO coefs
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: LDA_ao, LDA_mo
+  double precision, intent(in)  :: A_ao(LDA_ao,ao_num)
+  double precision, intent(out) :: A_mo(LDA_mo,mo_num)
+  double precision, allocatable :: T(:,:)
+
+  allocate ( T(ao_num,mo_num) )
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T
+
+  ! T = A_ao x mo_r_coef
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0      &
+            , A_ao, LDA_ao, mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, T, size(T, 1) )
+
+  ! A_mo = mo_l_coef.T x T
+  call dgemm( 'T', 'N', mo_num, mo_num, ao_num, 1.d0       &
+            , mo_l_coef, size(mo_l_coef, 1), T, size(T, 1) &
+            , 0.d0, A_mo, LDA_mo )
+
+!  call restore_symmetry(mo_num,mo_num,A_mo,size(A_mo,1),1.d-12)
+  deallocate(T)
+
+end subroutine ao_to_mo_bi_ortho
+
+! ---
+
+subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao)
+
+  BEGIN_DOC
+  !
+  ! mo_l_coef.T x     A_ao   x mo_r_coef = A_mo
+  ! mo_l_coef.T x ao_overlap x mo_r_coef =  I
+  !
+  ! ==> A_ao = (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: LDA_ao, LDA_mo
+  double precision, intent(in)  :: A_mo(LDA_mo,mo_num)
+  double precision, intent(out) :: A_ao(LDA_ao,ao_num)
+  double precision, allocatable :: tmp_1(:,:), tmp_2(:,:)
+
+  ! ao_overlap x mo_r_coef
+  allocate( tmp_1(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo
+  allocate( tmp_2(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 &
+            , tmp_1, size(tmp_1, 1), A_mo, LDA_mo    &
+            , 0.d0, tmp_2, size(tmp_2, 1) )
+  
+  ! ao_overlap x mo_l_coef
+  tmp_1 = 0.d0
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  call dgemm( 'N', 'T', ao_num, ao_num, mo_num, 1.d0       &
+            , tmp_2, size(tmp_2, 1), tmp_1, size(tmp_1, 1) &
+            , 0.d0, A_ao, LDA_ao )
+  
+  deallocate(tmp_1, tmp_2)
+
+end subroutine mo_to_ao_bi_ortho
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_r_coef, (ao_num, mo_num) ]
+
+  BEGIN_DOC
+  !
+  ! Molecular right-orbital coefficients on |AO| basis set
+  !
+  END_DOC
+
+  implicit none
+  integer :: i, j
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_bi_ortho_mos_mo_r_coef(exists)
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(exists, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read mo_r_coef with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+    if (mpi_master) then
+      call ezfio_get_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      write(*,*) 'Read mo_r_coef'
+    endif
+    IRP_IF MPI
+      call MPI_BCAST(mo_r_coef, mo_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        stop 'Unable to read mo_r_coef with MPI'
+      endif
+    IRP_ENDIF
+  else
+
+    print*, 'mo_r_coef are mo_coef'
+    do i = 1, mo_num
+      do j = 1, ao_num
+        mo_r_coef(j,i) = mo_coef(j,i)
+      enddo
+    enddo
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_l_coef, (ao_num, mo_num) ]
+
+  BEGIN_DOC
+  !
+  ! Molecular left-orbital coefficients on |AO| basis set
+  !
+  END_DOC
+
+  implicit none
+  integer :: i, j
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_bi_ortho_mos_mo_l_coef(exists)
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(exists, 1, MPI_LOGICAL, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read mo_l_coef with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+    if (mpi_master) then
+      call ezfio_get_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      write(*,*) 'Read mo_l_coef'
+    endif
+    IRP_IF MPI
+      call MPI_BCAST(mo_l_coef, mo_num*ao_num, MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        stop 'Unable to read mo_l_coef with MPI'
+      endif
+    IRP_ENDIF
+  else
+
+    print*, 'mo_l_coef are mo_coef'
+    do i = 1, mo_num
+      do j = 1, ao_num
+        mo_l_coef(j,i) = mo_coef(j,i)
+      enddo
+    enddo
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_r_coef_transp, (mo_num, ao_num)]
+
+  implicit none
+  integer :: j, m
+  do j = 1, mo_num
+    do m = 1, ao_num
+      mo_r_coef_transp(j,m) = mo_r_coef(m,j)
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_l_coef_transp, (mo_num, ao_num)]
+
+  implicit none
+  integer :: j, m
+  do j = 1, mo_num
+    do m = 1, ao_num
+      mo_l_coef_transp(j,m) = mo_l_coef(m,j)
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+
diff --git a/src/bi_ortho_mos/overlap.irp.f b/src/bi_ortho_mos/overlap.irp.f
new file mode 100644
index 00000000..d7f45c94
--- /dev/null
+++ b/src/bi_ortho_mos/overlap.irp.f
@@ -0,0 +1,160 @@
+
+
+ BEGIN_PROVIDER [ double precision, overlap_bi_ortho, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_diag_bi_ortho, (mo_num)]
+
+  BEGIN_DOC
+  ! Overlap matrix between the RIGHT and LEFT MOs. Should be the identity matrix 
+  END_DOC
+
+  implicit none
+  integer                       :: i, k, m, n
+  double precision              :: accu_d, accu_nd 
+  double precision, allocatable :: tmp(:,:)
+  
+ ! TODO : re do the DEGEMM
+
+  overlap_bi_ortho = 0.d0
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do m = 1, ao_num
+        do n = 1, ao_num
+          overlap_bi_ortho(k,i) += ao_overlap(n,m) * mo_l_coef(n,k) * mo_r_coef(m,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+!  allocate( tmp(mo_num,ao_num) )
+!
+!  ! tmp <-- L.T x S_ao
+!  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                         & 
+!            , mo_l_coef, size(mo_l_coef, 1), ao_overlap, size(ao_overlap, 1) &
+!            , 0.d0, tmp, size(tmp, 1) )
+!
+!  ! S <-- tmp x R
+!  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0           & 
+!            , tmp, size(tmp, 1), mo_r_coef, size(mo_r_coef, 1) &
+!            , 0.d0, overlap_bi_ortho, size(overlap_bi_ortho, 1) )
+!
+!  deallocate( tmp )
+
+  do i = 1, mo_num
+    overlap_diag_bi_ortho(i) = overlap_bi_ortho(i,i)
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, mo_num
+    do k = 1, mo_num
+      if(i==k) then
+        accu_d += dabs(overlap_bi_ortho(k,i))
+      else
+        accu_nd += dabs(overlap_bi_ortho(k,i))
+      endif
+    enddo 
+  enddo
+  accu_d = accu_d/dble(mo_num)
+  accu_nd = accu_nd/dble(mo_num**2-mo_num)
+  if(dabs(accu_d-1.d0).gt.1.d-10.or.dabs(accu_nd).gt.1.d-10)then
+    print*,'Warning !!!'
+    print*,'Average trace of overlap_bi_ortho is different from 1 by ', dabs(accu_d-1.d0)
+    print*,'And bi orthogonality is off by an average of ',accu_nd
+    print*,'****************'
+    print*,'Overlap matrix betwee mo_l_coef and mo_r_coef  '
+    do i = 1, mo_num
+      write(*,'(100(F16.10,X))')overlap_bi_ortho(i,:)
+    enddo
+  endif
+  print*,'Average trace of overlap_bi_ortho (should be 1.)'
+  print*,'accu_d  = ',accu_d
+  print*,'Sum of off diagonal terms of overlap_bi_ortho (should be zero)'
+  print*,'accu_nd = ',accu_nd
+  print*,'****************'
+ 
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, overlap_mo_r, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_mo_l, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  ! overlap_mo_r_mo(j,i) = <MO_i|MO_R_j>
+  END_DOC
+
+  implicit none
+  integer :: i, j, p, q
+
+  overlap_mo_r = 0.d0
+  overlap_mo_l = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do p = 1, ao_num
+        do q = 1, ao_num
+          overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) 
+          overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, overlap_mo_r_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_mo_l_mo, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  ! overlap_mo_r_mo(j,i) = <MO_j|MO_R_i>
+  END_DOC
+
+  implicit none
+  integer :: i, j, p, q
+
+  overlap_mo_r_mo = 0.d0
+  overlap_mo_l_mo = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do p = 1, ao_num
+        do q = 1, ao_num
+          overlap_mo_r_mo(j,i) += mo_coef(p,j) * mo_r_coef(q,i) * ao_overlap(q,p)
+          overlap_mo_l_mo(j,i) += mo_coef(p,j) * mo_l_coef(q,i) * ao_overlap(q,p)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, angle_left_right, (mo_num)]
+&BEGIN_PROVIDER [ double precision, max_angle_left_right]
+
+  BEGIN_DOC
+  ! angle_left_right(i) = angle between the left-eigenvector chi_i and the right-eigenvector phi_i
+  END_DOC
+
+  implicit none
+  integer          :: i, j
+  double precision :: left, right, arg
+  double precision :: angle(mo_num)
+
+  do i = 1, mo_num
+    left  = overlap_mo_l(i,i)
+    right = overlap_mo_r(i,i)
+    arg = min(overlap_bi_ortho(i,i)/(left*right),1.d0)
+    arg = max(arg, -1.d0)
+    angle_left_right(i) = dacos(arg) * 180.d0/dacos(-1.d0)
+  enddo
+
+  angle(1:mo_num) = dabs(angle_left_right(1:mo_num))
+  max_angle_left_right = maxval(angle)
+
+END_PROVIDER 
+
+! ---
+
+

From 4472a6d9be42e1606a343161602871e4b9ab9921 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Mon, 6 Feb 2023 19:00:35 +0100
Subject: [PATCH 2/7] non_h_ints compiles

---
 src/ao_one_e_ints/pot_ao_erf_ints.irp.f       | 627 +++++++++++---
 src/ao_tc_eff_map/NEED                        |   5 +
 src/ao_tc_eff_map/README.rst                  |  12 +
 src/ao_tc_eff_map/compute_ints_eff_pot.irp.f  |  76 ++
 src/ao_tc_eff_map/fit_j.irp.f                 | 510 ++++++++++++
 .../integrals_eff_pot_in_map_slave.irp.f      | 194 +++++
 src/ao_tc_eff_map/map_integrals_eff_pot.irp.f | 313 +++++++
 src/ao_tc_eff_map/one_e_1bgauss_grad2.irp.f   | 332 ++++++++
 src/ao_tc_eff_map/one_e_1bgauss_lap.irp.f     | 303 +++++++
 src/ao_tc_eff_map/one_e_1bgauss_nonherm.irp.f | 371 +++++++++
 src/ao_tc_eff_map/potential.irp.f             | 335 ++++++++
 src/ao_tc_eff_map/providers_ao_eff_pot.irp.f  |  86 ++
 src/ao_tc_eff_map/two_e_1bgauss_j1.irp.f      | 728 ++++++++++++++++
 src/ao_tc_eff_map/two_e_1bgauss_j2.irp.f      | 729 ++++++++++++++++
 src/ao_tc_eff_map/two_e_ints_gauss.irp.f      | 327 ++++++++
 src/ao_tc_eff_map/useful_sub.irp.f            | 364 ++++++++
 src/dft_utils_in_r/ao_in_r.irp.f              |  39 +
 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f      | 155 ++++
 src/non_h_ints_mu/NEED                        |   2 +
 src/non_h_ints_mu/README.rst                  |  11 +
 src/non_h_ints_mu/debug_fit.irp.f             | 512 ++++++++++++
 src/non_h_ints_mu/debug_integ_jmu_modif.irp.f | 780 ++++++++++++++++++
 src/non_h_ints_mu/grad_squared.irp.f          | 437 ++++++++++
 src/non_h_ints_mu/grad_squared_manu.irp.f     | 221 +++++
 src/non_h_ints_mu/grad_tc_int.irp.f           | 217 +++++
 src/non_h_ints_mu/j12_nucl_utils.irp.f        | 640 ++++++++++++++
 src/non_h_ints_mu/new_grad_tc.irp.f           | 360 ++++++++
 src/non_h_ints_mu/new_grad_tc_manu.irp.f      | 174 ++++
 src/non_h_ints_mu/numerical_integ.irp.f       | 623 ++++++++++++++
 src/non_h_ints_mu/test_non_h_ints.irp.f       | 102 +++
 src/non_h_ints_mu/total_tc_int.irp.f          |  91 ++
 src/tc_keywords/EZFIO.cfg                     | 185 +++++
 src/tc_keywords/NEED                          |   2 +
 src/tc_keywords/j1b_pen.irp.f                 | 116 +++
 src/tc_keywords/tc_keywords.irp.f             |   7 +
 src/utils/integration.irp.f                   | 414 ++++++++++
 src/utils/one_e_integration.irp.f             |  69 ++
 37 files changed, 10353 insertions(+), 116 deletions(-)
 create mode 100644 src/ao_tc_eff_map/NEED
 create mode 100644 src/ao_tc_eff_map/README.rst
 create mode 100644 src/ao_tc_eff_map/compute_ints_eff_pot.irp.f
 create mode 100644 src/ao_tc_eff_map/fit_j.irp.f
 create mode 100644 src/ao_tc_eff_map/integrals_eff_pot_in_map_slave.irp.f
 create mode 100644 src/ao_tc_eff_map/map_integrals_eff_pot.irp.f
 create mode 100644 src/ao_tc_eff_map/one_e_1bgauss_grad2.irp.f
 create mode 100644 src/ao_tc_eff_map/one_e_1bgauss_lap.irp.f
 create mode 100644 src/ao_tc_eff_map/one_e_1bgauss_nonherm.irp.f
 create mode 100644 src/ao_tc_eff_map/potential.irp.f
 create mode 100644 src/ao_tc_eff_map/providers_ao_eff_pot.irp.f
 create mode 100644 src/ao_tc_eff_map/two_e_1bgauss_j1.irp.f
 create mode 100644 src/ao_tc_eff_map/two_e_1bgauss_j2.irp.f
 create mode 100644 src/ao_tc_eff_map/two_e_ints_gauss.irp.f
 create mode 100644 src/ao_tc_eff_map/useful_sub.irp.f
 create mode 100644 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
 create mode 100644 src/non_h_ints_mu/NEED
 create mode 100644 src/non_h_ints_mu/README.rst
 create mode 100644 src/non_h_ints_mu/debug_fit.irp.f
 create mode 100644 src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
 create mode 100644 src/non_h_ints_mu/grad_squared.irp.f
 create mode 100644 src/non_h_ints_mu/grad_squared_manu.irp.f
 create mode 100644 src/non_h_ints_mu/grad_tc_int.irp.f
 create mode 100644 src/non_h_ints_mu/j12_nucl_utils.irp.f
 create mode 100644 src/non_h_ints_mu/new_grad_tc.irp.f
 create mode 100644 src/non_h_ints_mu/new_grad_tc_manu.irp.f
 create mode 100644 src/non_h_ints_mu/numerical_integ.irp.f
 create mode 100644 src/non_h_ints_mu/test_non_h_ints.irp.f
 create mode 100644 src/non_h_ints_mu/total_tc_int.irp.f
 create mode 100644 src/tc_keywords/EZFIO.cfg
 create mode 100644 src/tc_keywords/NEED
 create mode 100644 src/tc_keywords/j1b_pen.irp.f
 create mode 100644 src/tc_keywords/tc_keywords.irp.f

diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
index 42505194..c4a573be 100644
--- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
@@ -46,142 +46,327 @@ double precision function NAI_pol_mult_erf_ao(i_ao,j_ao,mu_in,C_center)
 
 end
 
+double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
 
-
-double precision function NAI_pol_mult_erf(A_center,B_center,power_A,power_B,alpha,beta,C_center,n_pt_in,mu_in)
   BEGIN_DOC
+  !
   ! Computes the following integral :
   !
   ! .. math::
-  ! 
+  !
   !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
-  !   \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in) :: n_pt_in
+  integer,          intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: C_center(3), A_center(3), B_center(3), alpha, beta, mu_in
+
+  integer                      :: i, n_pt, n_pt_out
+  double precision             :: P_center(3)
+  double precision             :: d(0:n_pt_in), coeff, dist, const, factor
+  double precision             :: const_factor, dist_integral
+  double precision             :: accu, p_inv, p, rho, p_inv_2
+  double precision             :: p_new
+
+  double precision             :: rint
+
+  p       = alpha + beta
+  p_inv   = 1.d0 / p
+  p_inv_2 = 0.5d0 * p_inv
+  rho     = alpha * beta * p_inv
+
+  dist          = 0.d0
+  dist_integral = 0.d0
+  do i = 1, 3
+    P_center(i)    = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist          += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+    dist_integral += (P_center(i) - C_center(i)) * (P_center(i) - C_center(i))
+  enddo
+  const_factor = dist * rho
+  if(const_factor > 80.d0) then
+    NAI_pol_mult_erf = 0.d0
+    return
+  endif
+
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+  factor = dexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv * p_new
+
+  n_pt  =  2 * ( (power_A(1) + power_B(1)) + (power_A(2) + power_B(2)) + (power_A(3) + power_B(3)) )
+  const = p * dist_integral * p_new * p_new
+  if(n_pt == 0) then
+    NAI_pol_mult_erf = coeff * rint(0, const)
+    return
+  endif
+
+  do i = 0, n_pt_in
+    d(i) = 0.d0
+  enddo
+  ! call give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
+  p_new = p_new * p_new
+  call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
+
+  if(n_pt_out < 0) then
+    NAI_pol_mult_erf = 0.d0
+    return
+  endif
+
+  ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+  accu = 0.d0
+  do i = 0, n_pt_out, 2
+    accu += d(i) * rint(i/2, const)
+  enddo
+  NAI_pol_mult_erf = accu * coeff
+
+end function NAI_pol_mult_erf
+
+! ---
+
+
+double precision function NAI_pol_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  ! $\int_{-\infty}^{infty} dr \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
   !
   END_DOC
 
   implicit none
-  integer, intent(in)            :: n_pt_in
-  double precision,intent(in)    :: C_center(3),A_center(3),B_center(3),alpha,beta,mu_in
-  integer, intent(in)            :: power_A(3),power_B(3)
-  integer                        :: i,j,k,l,n_pt
-  double precision               :: P_center(3)
+  integer,          intent(in)   :: i_ao, j_ao
+  double precision, intent(in)   :: beta, B_center(3)
+  double precision, intent(in)   :: mu_in, C_center(3)
+
+  integer                        :: i, j, power_A1(3), power_A2(3), n_pt_in
+  double precision               :: A1_center(3), A2_center(3), alpha1, alpha2, coef12, coef1, integral
+
+  double precision, external     :: NAI_pol_mult_erf_with1s, NAI_pol_mult_erf_ao
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_mult_erf_ao_with1s = NAI_pol_mult_erf_ao(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  power_A1(1:3) = ao_power(i_ao,1:3)
+  power_A2(1:3) = ao_power(j_ao,1:3)
+
+  A1_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  A2_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  NAI_pol_mult_erf_ao_with1s = 0.d0
+  do i = 1, ao_prim_num(i_ao)
+    alpha1 = ao_expo_ordered_transp           (i,i_ao)
+    coef1  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alpha2 = ao_expo_ordered_transp(j,j_ao)
+      coef12 = coef1 * ao_coef_normalized_ordered_transp(j,j_ao)
+      if(dabs(coef12) .lt. 1d-14) cycle
+
+      integral = NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+
+      NAI_pol_mult_erf_ao_with1s += integral * coef12
+    enddo
+  enddo
+
+end function NAI_pol_mult_erf_ao_with1s
+
+subroutine NAI_pol_mult_erf_with1s_v(A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, beta, B_center, LD_B, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
+  !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
+  !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
+  !   \exp(-\beta (r - B)^2)
+  !   \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
+  END_DOC
 
-  double precision               :: d(0:n_pt_in),pouet,coeff,dist,const,pouet_2,factor
-  double precision               :: I_n_special_exact,integrate_bourrin,I_n_bibi
-  double precision               :: V_e_n,const_factor,dist_integral,tmp
-  double precision               :: accu,rint,p_inv,p,rho,p_inv_2
-  integer                        :: n_pt_out,lmax
   include 'utils/constants.include.F'
-  p = alpha + beta
-  p_inv = 1.d0/p
-  p_inv_2 = 0.5d0 * p_inv
-  rho = alpha * beta * p_inv
 
-  dist = 0.d0
-  dist_integral = 0.d0
-  do i = 1, 3
-    P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv
-    dist += (A_center(i) - B_center(i))*(A_center(i) - B_center(i))
-    dist_integral += (P_center(i) - C_center(i))*(P_center(i) - C_center(i))
-  enddo
-  const_factor = dist*rho
-  if(const_factor > 80.d0)then
-    NAI_pol_mult_erf = 0.d0
-    return
-  endif
-  double precision               :: p_new
-  p_new = mu_in/dsqrt(p+ mu_in * mu_in)
-  factor = dexp(-const_factor)
-  coeff = dtwo_pi * factor * p_inv * p_new
-  lmax = 20
+  implicit none
+  integer,          intent(in)  :: n_pt_in, LD_B, LD_C, LD_resv, n_points
+  integer,          intent(in)  :: power_A1(3), power_A2(3)
+  double precision, intent(in)  :: A1_center(3), A2_center(3)
+  double precision, intent(in)  :: C_center(LD_C,3), B_center(LD_B,3)
+  double precision, intent(in)  :: alpha1, alpha2, beta, mu_in
+  double precision, intent(out) :: res_v(LD_resv)
 
-  !  print*, "b"
-  do i = 0, n_pt_in
-    d(i) = 0.d0
-  enddo
-  n_pt =  2 * ( (power_A(1) + power_B(1)) +(power_A(2) + power_B(2)) +(power_A(3) + power_B(3)) )
-  const = p * dist_integral * p_new * p_new
-  if (n_pt == 0) then
-    pouet = rint(0,const)
-    NAI_pol_mult_erf = coeff * pouet
+  integer                       :: i, n_pt, n_pt_out, ipoint
+  double precision              :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
+  double precision              :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
+  double precision              :: dist_integral
+  double precision              :: d(0:n_pt_in), coeff, const, factor
+  double precision              :: accu
+  double precision              :: p_new, p_new2, coef_tmp, cons_tmp
+
+  double precision              :: rint
+
+
+  res_V(1:LD_resv) = 0.d0
+
+  ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
+  alpha12       = alpha1 + alpha2
+  alpha12_inv   = 1.d0 / alpha12
+  alpha12_inv_2 = 0.5d0 * alpha12_inv
+  rho12         = alpha1 * alpha2 * alpha12_inv
+  A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv
+  A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
+  A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
+  dist12        = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))&
+                + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))&
+                + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
+
+  const_factor12 = dist12 * rho12
+  if(const_factor12 > 80.d0) then
     return
   endif
 
-  ! call give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
-  p_new = p_new * p_new
-  call give_polynomial_mult_center_one_e_erf_opt(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in,p,p_inv,p_inv_2,p_new,P_center)
+  ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
+  p        = alpha12 + beta
+  p_inv    = 1.d0 / p
+  p_inv_2  = 0.5d0 * p_inv
+  rho      = alpha12 * beta * p_inv
+  p_new    = mu_in / dsqrt(p + mu_in * mu_in)
+  p_new2   = p_new * p_new
+  coef_tmp = dtwo_pi * p_inv * p_new
+  cons_tmp = p * p_new2
+  n_pt     =  2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) + power_A1(3) + power_A2(3) )
 
+  if(n_pt == 0) then
+
+    do ipoint = 1, n_points
+
+      dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
+           + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
+           + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3))
+      const_factor = const_factor12 + dist * rho
+      if(const_factor > 80.d0) cycle
+      coeff = coef_tmp * dexp(-const_factor)
+
+      P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
+      P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
+      P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
+      dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
+                    + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
+                    + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
+      const = cons_tmp * dist_integral
+
+      res_v(ipoint) = coeff * rint(0, const)
+    enddo
+
+  else
+
+    do ipoint = 1, n_points
+  
+      dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
+           + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
+           + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) 
+      const_factor = const_factor12 + dist * rho
+      if(const_factor > 80.d0) cycle
+      coeff = coef_tmp * dexp(-const_factor)
+  
+      P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
+      P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
+      P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
+      dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
+                    + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
+                    + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
+      const = cons_tmp * dist_integral
+  
+      do i = 0, n_pt_in
+        d(i) = 0.d0
+      enddo
+      !TODO: VECTORIZE HERE
+      call give_polynomial_mult_center_one_e_erf_opt(A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center)
+  
+      if(n_pt_out < 0) then
+        cycle
+      endif
+  
+      ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+      accu = 0.d0
+      do i = 0, n_pt_out, 2
+        accu += d(i) * rint(i/2, const)
+      enddo
+  
+      res_v(ipoint) = accu * coeff
+    enddo
 
-  if(n_pt_out<0)then
-    NAI_pol_mult_erf = 0.d0
-    return
   endif
-  accu = 0.d0
 
-  ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
-  do i =0 ,n_pt_out,2
-    accu +=  d(i) * rint(i/2,const)
-  enddo
-  NAI_pol_mult_erf = accu * coeff
+end subroutine NAI_pol_mult_erf_with1s_v
 
-end
+! ---
 
+subroutine give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
 
-subroutine give_polynomial_mult_center_one_e_erf_opt(A_center,B_center,alpha,beta,&
-      power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in,p,p_inv,p_inv_2,p_new,P_center)
   BEGIN_DOC
   ! Returns the explicit polynomial in terms of the $t$ variable of the
   ! following polynomial:
   !
   ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.
   END_DOC
+
   implicit none
-  integer, intent(in)            :: n_pt_in
-  integer,intent(out)            :: n_pt_out
-  double precision, intent(in)   :: A_center(3), B_center(3),C_center(3),p,p_inv,p_inv_2,p_new,P_center(3)
-  double precision, intent(in)   :: alpha,beta,mu_in
-  integer, intent(in)            :: power_A(3), power_B(3)
-  integer                        :: a_x,b_x,a_y,b_y,a_z,b_z
-  double precision               :: d(0:n_pt_in)
-  double precision               :: d1(0:n_pt_in)
-  double precision               :: d2(0:n_pt_in)
-  double precision               :: d3(0:n_pt_in)
-  double precision               :: accu
+  integer,          intent(in)  :: n_pt_in
+  integer,          intent(in)  :: power_A(3), power_B(3)
+  double precision, intent(in)  :: A_center(3), B_center(3), C_center(3), p_inv_2, p_new, P_center(3)
+  integer,          intent(out) :: n_pt_out
+  double precision, intent(out) :: d(0:n_pt_in)
+
+  integer                       :: a_x, b_x, a_y, b_y, a_z, b_z
+  integer                       :: n_pt1, n_pt2, n_pt3, dim, i
+  integer                       :: n_pt_tmp
+  double precision              :: d1(0:n_pt_in)
+  double precision              :: d2(0:n_pt_in)
+  double precision              :: d3(0:n_pt_in)
+  double precision              :: accu
+  double precision              :: R1x(0:2), B01(0:2), R1xp(0:2), R2x(0:2)
+
   accu = 0.d0
   ASSERT (n_pt_in > 1)
 
-  double precision               :: R1x(0:2), B01(0:2), R1xp(0:2),R2x(0:2)
-  R1x(0)  = (P_center(1) - A_center(1))
-  R1x(1)  = 0.d0
-  R1x(2)  = -(P_center(1) - C_center(1))* p_new
+  R1x(0) = (P_center(1) - A_center(1))
+  R1x(1) = 0.d0
+  R1x(2) = -(P_center(1) - C_center(1))* p_new
   ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R1xp(0)  = (P_center(1) - B_center(1))
-  R1xp(1)  = 0.d0
-  R1xp(2)  =-(P_center(1) - C_center(1))* p_new
+  R1xp(0) = (P_center(1) - B_center(1))
+  R1xp(1) = 0.d0
+  R1xp(2) =-(P_center(1) - C_center(1))* p_new
   !R1xp = (P_x - B_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R2x(0)  =  p_inv_2
-  R2x(1)  = 0.d0
-  R2x(2)  = -p_inv_2* p_new
+  R2x(0) =  p_inv_2
+  R2x(1) = 0.d0
+  R2x(2) = -p_inv_2 * p_new
   !R2x  = 0.5 / p - 0.5/p ( t * mu/sqrt(p+mu^2) )^2
-  do i = 0,n_pt_in
-    d(i) = 0.d0
-  enddo
-  do i = 0,n_pt_in
+
+  do i = 0, n_pt_in
+    d (i) = 0.d0
     d1(i) = 0.d0
-  enddo
-  do i = 0,n_pt_in
     d2(i) = 0.d0
-  enddo
-  do i = 0,n_pt_in
     d3(i) = 0.d0
   enddo
-  integer                        :: n_pt1,n_pt2,n_pt3,dim,i
+
   n_pt1 = n_pt_in
   n_pt2 = n_pt_in
   n_pt3 = n_pt_in
   a_x = power_A(1)
   b_x = power_B(1)
-  call I_x1_pol_mult_one_e(a_x,b_x,R1x,R1xp,R2x,d1,n_pt1,n_pt_in)
+  call I_x1_pol_mult_one_e(a_x, b_x, R1x, R1xp, R2x, d1, n_pt1, n_pt_in)
   if(n_pt1<0)then
     n_pt_out = -1
     do i = 0,n_pt_in
@@ -190,17 +375,17 @@ subroutine give_polynomial_mult_center_one_e_erf_opt(A_center,B_center,alpha,bet
     return
   endif
 
-  R1x(0)  = (P_center(2) - A_center(2))
-  R1x(1)  = 0.d0
-  R1x(2)  = -(P_center(2) - C_center(2))* p_new
+  R1x(0) = (P_center(2) - A_center(2))
+  R1x(1) = 0.d0
+  R1x(2) = -(P_center(2) - C_center(2))* p_new
   ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R1xp(0)  = (P_center(2) - B_center(2))
-  R1xp(1)  = 0.d0
-  R1xp(2)  =-(P_center(2) - C_center(2))* p_new
+  R1xp(0) = (P_center(2) - B_center(2))
+  R1xp(1) = 0.d0
+  R1xp(2) =-(P_center(2) - C_center(2))* p_new
   !R1xp = (P_x - B_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
   a_y = power_A(2)
   b_y = power_B(2)
-  call I_x1_pol_mult_one_e(a_y,b_y,R1x,R1xp,R2x,d2,n_pt2,n_pt_in)
+  call I_x1_pol_mult_one_e(a_y, b_y, R1x, R1xp, R2x, d2, n_pt2, n_pt_in)
   if(n_pt2<0)then
     n_pt_out = -1
     do i = 0,n_pt_in
@@ -209,51 +394,151 @@ subroutine give_polynomial_mult_center_one_e_erf_opt(A_center,B_center,alpha,bet
     return
   endif
 
-
-  R1x(0)  = (P_center(3) - A_center(3))
-  R1x(1)  = 0.d0
-  R1x(2)  = -(P_center(3) - C_center(3))* p_new
+  R1x(0) = (P_center(3) - A_center(3))
+  R1x(1) = 0.d0
+  R1x(2) = -(P_center(3) - C_center(3)) * p_new
   ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R1xp(0)  = (P_center(3) - B_center(3))
-  R1xp(1)  = 0.d0
-  R1xp(2)  =-(P_center(3) - C_center(3))* p_new
+  R1xp(0) = (P_center(3) - B_center(3))
+  R1xp(1) = 0.d0
+  R1xp(2) =-(P_center(3) - C_center(3)) * p_new
   !R2x  = 0.5 / p - 0.5/p ( t * mu/sqrt(p+mu^2) )^2
   a_z = power_A(3)
   b_z = power_B(3)
 
-  call I_x1_pol_mult_one_e(a_z,b_z,R1x,R1xp,R2x,d3,n_pt3,n_pt_in)
-  if(n_pt3<0)then
+  call I_x1_pol_mult_one_e(a_z, b_z, R1x, R1xp, R2x, d3, n_pt3, n_pt_in)
+  if(n_pt3 < 0) then
     n_pt_out = -1
     do i = 0,n_pt_in
       d(i) = 0.d0
     enddo
     return
   endif
-  integer                        :: n_pt_tmp
+
   n_pt_tmp = 0
-  call multiply_poly(d1,n_pt1,d2,n_pt2,d,n_pt_tmp)
-  do i = 0,n_pt_tmp
+  call multiply_poly(d1, n_pt1, d2, n_pt2, d, n_pt_tmp)
+  do i = 0, n_pt_tmp
     d1(i) = 0.d0
   enddo
   n_pt_out = 0
-  call multiply_poly(d ,n_pt_tmp ,d3,n_pt3,d1,n_pt_out)
+  call multiply_poly(d, n_pt_tmp, d3, n_pt3, d1, n_pt_out)
   do i = 0, n_pt_out
     d(i) = d1(i)
   enddo
 
-end
+end subroutine give_polynomial_mult_center_one_e_erf_opt
 
+! ---
+subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points)
 
-
-
-subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,&
-      power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
   BEGIN_DOC
-  ! Returns the explicit polynomial in terms of the $t$ variable of the 
+  !
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !   \int dr (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \frac{\erf(\mu |r - R_C |)}{| r - R_C |}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: n_pt_in, n_points, LD_C, LD_resv
+  integer,          intent(in)  :: power_A(3), power_B(3)
+  double precision, intent(in)  :: A_center(3), B_center(3), alpha, beta, mu_in
+  double precision, intent(in)  :: C_center(LD_C,3)
+  double precision, intent(out) :: res_v(LD_resv)
+
+  integer                       :: i, n_pt, n_pt_out, ipoint
+  double precision              :: P_center(3)
+  double precision              :: d(0:n_pt_in), coeff, dist, const, factor
+  double precision              :: const_factor, dist_integral
+  double precision              :: accu, p_inv, p, rho, p_inv_2
+  double precision              :: p_new, p_new2, coef_tmp
+
+  double precision              :: rint
+
+  res_V(1:LD_resv) = 0.d0
+
+  p        = alpha + beta
+  p_inv    = 1.d0 / p
+  p_inv_2  = 0.5d0 * p_inv
+  rho      = alpha * beta * p_inv
+  p_new    = mu_in / dsqrt(p + mu_in * mu_in)
+  p_new2   = p_new * p_new
+  coef_tmp = p * p_new2
+
+  dist = 0.d0
+  do i = 1, 3
+    P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist       += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+  enddo
+
+  const_factor = dist * rho
+  if(const_factor > 80.d0) then
+    return
+  endif
+  factor = dexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv * p_new
+
+  n_pt =  2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) )
+
+  if(n_pt == 0) then
+
+    do ipoint = 1, n_points
+      dist_integral = 0.d0
+      do i = 1, 3
+        dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+      enddo
+      const = coef_tmp * dist_integral
+
+      res_v(ipoint) = coeff * rint(0, const)
+    enddo
+
+  else
+
+    do ipoint = 1, n_points
+      dist_integral = 0.d0
+      do i = 1, 3
+        dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+      enddo
+      const = coef_tmp * dist_integral
+
+      do i = 0, n_pt_in
+        d(i) = 0.d0
+      enddo
+      call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center)                                                                                                                                                                                                                                                                                                                                                                                                                                                                  
+
+      if(n_pt_out < 0) then
+        res_v(ipoint) = 0.d0
+        cycle
+      endif
+
+      ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+      accu = 0.d0
+      do i = 0, n_pt_out, 2
+        accu += d(i) * rint(i/2, const)
+      enddo
+
+      res_v(ipoint) = accu * coeff
+    enddo
+
+  endif
+
+end subroutine NAI_pol_mult_erf_v
+
+
+subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
+
+  BEGIN_DOC
+  ! Returns the explicit polynomial in terms of the $t$ variable of the
   ! following polynomial:
   !
   ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.
   END_DOC
+
   implicit none
   integer, intent(in)            :: n_pt_in
   integer,intent(out)            :: n_pt_out
@@ -374,3 +659,113 @@ subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,&
 
 end
 
+double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2 &
+                                                 , beta, B_center, C_center, n_pt_in, mu_in )
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! .. math::
+  !
+  !   \int dx (x - A1_x)^a_1 (x - B1_x)^a_2 \exp(-\alpha_1 (x - A1_x)^2 - \alpha_2 (x - A2_x)^2)
+  !   \int dy (y - A1_y)^b_1 (y - B1_y)^b_2 \exp(-\alpha_1 (y - A1_y)^2 - \alpha_2 (y - A2_y)^2)
+  !   \int dz (x - A1_z)^c_1 (z - B1_z)^c_2 \exp(-\alpha_1 (z - A1_z)^2 - \alpha_2 (z - A2_z)^2)
+  !   \exp(-\beta (r - B)^2)
+  !   \frac{\erf(\mu |r - R_C|)}{|r - R_C|}$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+  integer,          intent(in) :: n_pt_in
+  integer,          intent(in) :: power_A1(3), power_A2(3)
+  double precision, intent(in) :: C_center(3), A1_center(3), A2_center(3), B_center(3)
+  double precision, intent(in) :: alpha1, alpha2, beta, mu_in
+
+  integer                      :: i, n_pt, n_pt_out
+  double precision             :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
+  double precision             :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
+  double precision             :: dist_integral
+  double precision             :: d(0:n_pt_in), coeff, const, factor
+  double precision             :: accu
+  double precision             :: p_new
+
+  double precision             :: rint
+
+
+  ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
+  alpha12       = alpha1 + alpha2
+  alpha12_inv   = 1.d0 / alpha12
+  alpha12_inv_2 = 0.5d0 * alpha12_inv
+  rho12         = alpha1 * alpha2 * alpha12_inv
+  A12_center(1) = (alpha1 * A1_center(1) + alpha2 * A2_center(1)) * alpha12_inv
+  A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
+  A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
+  dist12        = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1)) &
+                + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2)) &
+                + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
+
+  const_factor12 = dist12 * rho12
+  if(const_factor12 > 80.d0) then
+    NAI_pol_mult_erf_with1s = 0.d0
+    return
+  endif
+
+  ! ---
+
+  ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
+  p           = alpha12 + beta
+  p_inv       = 1.d0 / p
+  p_inv_2     = 0.5d0 * p_inv
+  rho         = alpha12 * beta * p_inv
+  P_center(1) = (alpha12 * A12_center(1) + beta * B_center(1)) * p_inv
+  P_center(2) = (alpha12 * A12_center(2) + beta * B_center(2)) * p_inv
+  P_center(3) = (alpha12 * A12_center(3) + beta * B_center(3)) * p_inv
+  dist        = (A12_center(1) - B_center(1)) * (A12_center(1) - B_center(1)) &
+              + (A12_center(2) - B_center(2)) * (A12_center(2) - B_center(2)) &
+              + (A12_center(3) - B_center(3)) * (A12_center(3) - B_center(3))
+
+  const_factor = const_factor12 + dist * rho
+  if(const_factor > 80.d0) then
+    NAI_pol_mult_erf_with1s = 0.d0
+    return
+  endif
+
+  dist_integral = (P_center(1) - C_center(1)) * (P_center(1) - C_center(1)) &
+                + (P_center(2) - C_center(2)) * (P_center(2) - C_center(2)) &
+                + (P_center(3) - C_center(3)) * (P_center(3) - C_center(3))
+
+  ! ---
+
+  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+  factor = dexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv * p_new
+
+  n_pt  =  2 * ( (power_A1(1) + power_A2(1)) + (power_A1(2) + power_A2(2)) + (power_A1(3) + power_A2(3)) )
+  const = p * dist_integral * p_new * p_new
+  if(n_pt == 0) then
+    NAI_pol_mult_erf_with1s = coeff * rint(0, const)
+    return
+  endif
+
+  do i = 0, n_pt_in
+    d(i) = 0.d0
+  enddo
+  p_new = p_new * p_new
+  call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
+
+  if(n_pt_out < 0) then
+    NAI_pol_mult_erf_with1s = 0.d0
+    return
+  endif
+
+  ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+  accu = 0.d0
+  do i = 0, n_pt_out, 2
+    accu += d(i) * rint(i/2, const)
+  enddo
+  NAI_pol_mult_erf_with1s = accu * coeff
+
+end function NAI_pol_mult_erf_with1s
diff --git a/src/ao_tc_eff_map/NEED b/src/ao_tc_eff_map/NEED
new file mode 100644
index 00000000..d9edb325
--- /dev/null
+++ b/src/ao_tc_eff_map/NEED
@@ -0,0 +1,5 @@
+ao_two_e_erf_ints
+mo_one_e_ints
+ao_many_one_e_ints
+dft_utils_in_r
+tc_keywords
diff --git a/src/ao_tc_eff_map/README.rst b/src/ao_tc_eff_map/README.rst
new file mode 100644
index 00000000..d45df18f
--- /dev/null
+++ b/src/ao_tc_eff_map/README.rst
@@ -0,0 +1,12 @@
+ao_tc_eff_map
+=============
+
+This is a module to obtain the integrals on the AO basis of the SCALAR HERMITIAN 
+effective potential defined in Eq. 32 of JCP 154, 084119 (2021)
+It also contains the modification by a one-body Jastrow factor.  
+
+The main routine/providers are
+
++) ao_tc_sym_two_e_pot_map : map of the SCALAR PART of total effective two-electron on the AO basis in PHYSICIST notations. It might contain the two-electron term coming from the one-e correlation factor. 
++) get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map) : routine to get the integrals from ao_tc_sym_two_e_pot_map. 
++) ao_tc_sym_two_e_pot(i,j,k,l) : FUNCTION that returns the scalar part of TC-potential EXCLUDING the erf(mu r12)/r12. See two_e_ints_gauss.irp.f for more details. 
diff --git a/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f b/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f
new file mode 100644
index 00000000..7a567979
--- /dev/null
+++ b/src/ao_tc_eff_map/compute_ints_eff_pot.irp.f
@@ -0,0 +1,76 @@
+
+
+subroutine compute_ao_tc_sym_two_e_pot_jl(j, l, n_integrals, buffer_i, buffer_value)
+
+  use map_module
+
+  BEGIN_DOC
+  !  Parallel client for AO integrals
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)             :: j, l
+  integer,intent(out)             :: n_integrals
+  integer(key_kind),intent(out)   :: buffer_i(ao_num*ao_num)
+  real(integral_kind),intent(out) :: buffer_value(ao_num*ao_num)
+
+  integer                         :: i, k
+  integer                         :: kk, m, j1, i1
+  double precision                :: cpu_1, cpu_2, wall_1, wall_2
+  double precision                :: integral, wall_0, integral_pot, integral_erf
+  double precision                :: thr
+
+  logical, external               :: ao_two_e_integral_zero
+  double precision                :: ao_tc_sym_two_e_pot, ao_two_e_integral_erf
+  double precision                :: j1b_gauss_2e_j1, j1b_gauss_2e_j2
+
+
+  PROVIDE j1b_type
+
+  thr = ao_integrals_threshold
+
+  n_integrals = 0
+
+  j1 = j+ishft(l*l-l,-1)
+  do k = 1, ao_num           ! r1
+    i1 = ishft(k*k-k,-1)
+    if (i1 > j1) then
+      exit
+    endif
+    do i = 1, k
+      i1 += 1
+      if (i1 > j1) then
+        exit
+      endif
+
+      if (ao_two_e_integral_erf_schwartz(i,k)*ao_two_e_integral_erf_schwartz(j,l) < thr) then
+        cycle
+      endif
+
+      !DIR$ FORCEINLINE
+      integral_pot = ao_tc_sym_two_e_pot  (i, k, j, l)  ! i,k : r1    j,l : r2
+      integral_erf = ao_two_e_integral_erf(i, k, j, l)
+      integral     = integral_erf + integral_pot
+
+      if( j1b_type .eq. 1 ) then
+        !print *, ' j1b type 1 is added'
+        integral = integral + j1b_gauss_2e_j1(i, k, j, l)
+      elseif( j1b_type .eq. 2 ) then
+        !print *, ' j1b type 2 is added'
+        integral = integral + j1b_gauss_2e_j2(i, k, j, l)
+      endif
+
+      if(abs(integral) < thr) then
+        cycle
+      endif
+
+      n_integrals += 1
+      !DIR$ FORCEINLINE
+      call two_e_integrals_index(i, j, k, l, buffer_i(n_integrals))
+      buffer_value(n_integrals) = integral
+    enddo
+  enddo
+
+end subroutine compute_ao_tc_sym_two_e_pot_jl
+
diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f
new file mode 100644
index 00000000..4730d003
--- /dev/null
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@@ -0,0 +1,510 @@
+ BEGIN_PROVIDER [ double precision, expo_j_xmu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_j_xmu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! Upper bound long range fit of F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) 
+ !
+ ! with a single gaussian. 
+ !
+ ! Such a function can be used to screen integrals with F(x). 
+ END_DOC
+ expo_j_xmu_1gauss  = 0.5d0
+ coef_j_xmu_1gauss  = 1.d0
+END_PROVIDER 
+! ---
+
+BEGIN_PROVIDER [ double precision, expo_erfc_gauss ]
+ implicit none 
+ expo_erfc_gauss = 1.41211d0
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, expo_erfc_mu_gauss ]
+ implicit none 
+ expo_erfc_mu_gauss = expo_erfc_gauss * mu_erf * mu_erf
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! exponent of Gaussian in order to obtain an upper bound of J(r12,mu)
+ !
+ ! Can be used to scree integrals with J(r12,mu)
+ END_DOC
+ expo_good_j_mu_1gauss = 2.D0 * mu_erf * expo_j_xmu_1gauss
+ coef_good_j_mu_1gauss = 0.5d0/mu_erf * coef_j_xmu_1gauss
+ END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, expo_j_xmu, (n_fit_1_erf_x) ]
+ implicit none
+ BEGIN_DOC
+ ! F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) is fitted with a gaussian and a Slater
+ !
+ !      \approx - 1/sqrt(pi) * exp(-alpha * x ) exp(-beta * x**2)
+ !
+ ! where alpha = expo_j_xmu(1) and beta = expo_j_xmu(2)
+ END_DOC
+ expo_j_xmu(1) = 1.7477d0
+ expo_j_xmu(2) = 0.668662d0
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x, (ng_fit_jast)]
+
+  BEGIN_DOC
+  !
+  ! J(mu,r12) = 1/2 r12 * (1 - erf(mu*r12)) - 1/(2 sqrt(pi)*mu) exp(-(mu*r12)^2) is expressed as 
+  !
+  ! J(mu,r12) = 0.5/mu * F(r12*mu) where F(x) =  x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) 
+  !
+  ! F(x) is fitted by - 1/sqrt(pi) * exp(-alpha * x) exp(-beta * x^2) (see expo_j_xmu) 
+  ! 
+  ! The slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians 
+  !
+  ! See Appendix 2 of JCP 154, 084119 (2021)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: tmp
+  double precision :: expos(ng_fit_jast), alpha, beta
+
+  if(ng_fit_jast .eq. 1) then
+
+    coef_gauss_j_mu_x = (/ -0.47947881d0 /)
+    expo_gauss_j_mu_x = (/ 3.4987848d0   /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_j_mu_x = (/ -0.18390742d0, -0.35512656d0 /)
+    expo_gauss_j_mu_x = (/ 31.9279947d0 ,  2.11428789d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_j_mu_x = (/ -0.07501725d0, -0.28499012d0, -0.1953932d0  /)
+    expo_gauss_j_mu_x = (/ 206.74058566d0, 1.72974157d0, 11.18735164d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_j_mu_x = (/ -0.01832955d0 , -0.10188952d0 , -0.20710858d0 , -0.18975032d0 , -0.04641657d0  /)
+    expo_gauss_j_mu_x = (/ 4.33116687d+03, 2.61292842d+01, 1.43447161d+00, 4.92767426d+00, 2.10654699d+02 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_j_mu_x = (/ -0.08783664d0 , -0.16088711d0 , -0.18464486d0 , -0.0368509d0  , -0.08130028d0 , -0.0126972d0   /)
+    expo_gauss_j_mu_x = (/ 4.09729729d+01, 7.11620618d+00, 2.03692338d+00, 4.10831731d+02, 1.12480198d+00, 1.00000000d+04 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_x = (/ -0.01756495d0 , -0.01023623d0  , -0.06548959d0  , -0.03539446d0  , -0.17150646d0  , -0.15071096d0  , -0.11326834d0   /)
+    expo_gauss_j_mu_x = (/ 9.88572565d+02,  1.21363371d+04,  3.69794870d+01,  1.67364529d+02,  3.03962934d+00,  1.27854005d+00,  9.76383343d+00 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_x = (/ -0.11489205d0 , -0.16008968d0 , -0.12892456d0 , -0.04250838d0 , -0.0718451d0  , -0.02394051d0 , -0.00913353d0 , -0.01285182d0  /)
+    expo_gauss_j_mu_x = (/ 6.97632442d+00, 2.56010878d+00, 1.22760977d+00, 7.47697124d+01, 2.16104215d+01, 2.96549728d+02, 1.40773328d+04, 1.43335159d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_x = (/ /)
+  !  expo_gauss_j_mu_x = (/ /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+  !  enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    alpha = expo_j_xmu(1) * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = expo_j_xmu(2) * mu_erf * mu_erf
+
+    tmp = -1.0d0 / sqrt(dacos(-1.d0))
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = expos(i) + beta
+      coef_gauss_j_mu_x(i) = tmp * coef_fit_slat_gauss(i) 
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
+  
+  endif
+
+  tmp = 0.5d0 / mu_erf
+  do i = 1, ng_fit_jast
+    coef_gauss_j_mu_x(i) = tmp * coef_gauss_j_mu_x(i) 
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x_2, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x_2, (ng_fit_jast)]
+
+  BEGIN_DOC
+  !
+  ! J(mu,r12)^2 = 0.25/mu^2 F(r12*mu)^2
+  !
+  ! F(x)^2 = 1/pi * exp(-2 * alpha * x) exp(-2 * beta * x^2) 
+  ! 
+  ! The slater function exp(-2 * alpha * x) is fitted with n_max_fit_slat gaussians 
+  !
+  ! See Appendix 2 of JCP 154, 084119 (2021)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: tmp
+  double precision :: expos(ng_fit_jast), alpha, beta
+  double precision :: alpha_opt, beta_opt
+
+  if(ng_fit_jast .eq. 1) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.26699573d0  /)
+    expo_gauss_j_mu_x_2 = (/ 11.71029824d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.11627934d0  , 0.18708824d0 /)
+    expo_gauss_j_mu_x_2 = (/ 102.41386863d0, 6.36239771d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.04947216d0  , 0.14116238d0, 0.12276501d0  /)
+    expo_gauss_j_mu_x_2 = (/ 635.29701766d0, 4.87696954d0, 33.36745891d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.01461527d0  , 0.03257147d0  , 0.08831354d0  , 0.11411794d0  , 0.06858783d0   /)
+    expo_gauss_j_mu_x_2 = (/ 8.76554470d+03, 4.90224577d+02, 3.68267125d+00, 1.29663940d+01, 6.58240931d+01 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.01347632d0  , 0.03929124d0  , 0.06289468d0  , 0.10702493d0  , 0.06999865d0  , 0.02558191d0   /)
+    expo_gauss_j_mu_x_2 = (/ 1.00000000d+04, 1.20900717d+02, 3.20346191d+00, 8.92157196d+00, 3.28119120d+01, 6.49045808d+02 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.05202849d0  , 0.01031081d0  , 0.04699157d0  , 0.01451002d0  , 0.07442576d0  , 0.02692033d0  , 0.09311842d0   /)
+    expo_gauss_j_mu_x_2 = (/ 3.04469415d+00, 1.40682034d+04, 7.45960945d+01, 1.43067466d+03, 2.16815661d+01, 2.95750306d+02, 7.23471236d+00 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.00942115d0  , 0.07332421d0  , 0.0508308d0   , 0.08204949d0  , 0.0404099d0   , 0.03201288d0  , 0.01911313d0  , 0.01114732d0   /)
+    expo_gauss_j_mu_x_2 = (/ 1.56957321d+04, 1.52867810d+01, 4.36016903d+01, 5.96818956d+00, 2.85535269d+00, 1.36064008d+02, 4.71968910d+02, 1.92022350d+03 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_x_2 = (/  /)
+  !  expo_gauss_j_mu_x_2 = (/  /)
+  !  
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+  !  enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    !alpha_opt = 2.d0 * expo_j_xmu(1)
+    !beta_opt  = 2.d0 * expo_j_xmu(2)
+   
+    ! direct opt
+    alpha_opt = 3.52751759d0
+    beta_opt  = 1.26214809d0
+  
+    alpha = alpha_opt * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = beta_opt * mu_erf * mu_erf
+    
+    tmp = 1.d0 / dacos(-1.d0)
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = expos(i) + beta
+      coef_gauss_j_mu_x_2(i) = tmp * coef_fit_slat_gauss(i) 
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
+  
+  endif
+
+  tmp = 0.25d0 / (mu_erf * mu_erf)
+  do i = 1, ng_fit_jast
+    coef_gauss_j_mu_x_2(i) = tmp * coef_gauss_j_mu_x_2(i)
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_j_mu_1_erf, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_1_erf, (ng_fit_jast)]
+
+  BEGIN_DOC
+  !
+  ! J(mu,r12) x \frac{1 - erf(mu * r12)}{2} = 
+  !
+  ! - \frac{1}{4 \sqrt{\pi} \mu} \exp(-(alpha1 + alpha2) * mu * r12 - (beta1 + beta2) * mu^2 * r12^2)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: tmp
+  double precision :: expos(ng_fit_jast), alpha, beta
+  double precision :: alpha_opt, beta_opt
+
+  if(ng_fit_jast .eq. 1) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.47742461d0 /)
+    expo_gauss_j_mu_1_erf = (/ 8.72255696d0  /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.19342649d0, -0.34563835d0 /)
+    expo_gauss_j_mu_1_erf = (/ 78.66099999d0,  5.04324363d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.0802541d0  , -0.27019258d0, -0.20546681d0 /)
+    expo_gauss_j_mu_1_erf = (/ 504.53350764d0,  4.01408169d0, 26.5758329d0  /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.02330531d0 , -0.11888176d0 , -0.16476192d0 , -0.19874713d0 , -0.05889174d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.00000000d+04, 4.66067922d+01, 3.04359857d+00, 9.54726649d+00, 3.59796835d+02 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.01865654d0 , -0.18319251d0 , -0.06543196d0 , -0.11522778d0 , -0.14825793d0 , -0.03327101d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.00000000d+04, 8.05593848d+00, 1.27986190d+02, 2.92674319d+01, 2.93583623d+00, 7.65609148d+02 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.11853067d0 , -0.01522824d0  , -0.07419098d0  , -0.022202d0    , -0.12242283d0  , -0.04177571d0  , -0.16983107d0  /)
+    expo_gauss_j_mu_1_erf = (/ 2.74057056d+00,  1.37626591d+04,  6.65578663d+01,  1.34693031d+03,  1.90547699d+01,  2.69445390d+02,  6.31845879d+00/)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.12263328d0 , -0.04965255d0 , -0.15463564d0 , -0.09675781d0 , -0.0807023d0  , -0.02923298d0 , -0.01381381d0 , -0.01675923d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.36101994d+01, 1.24908367d+02, 5.29061388d+00, 2.60692516d+00, 3.93396935d+01, 4.43071610d+02, 1.54902240d+04, 1.85170446d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_1_erf = (/  /)
+  !  expo_gauss_j_mu_1_erf = (/  /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+  !  enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    !alpha_opt = expo_j_xmu(1) + expo_gauss_1_erf_x(1)
+    !beta_opt  = expo_j_xmu(2) + expo_gauss_1_erf_x(2)
+   
+    ! direct opt
+    alpha_opt = 2.87875632d0
+    beta_opt  = 1.34801003d0
+    
+    alpha = alpha_opt * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = beta_opt * mu_erf * mu_erf
+    
+    tmp = -1.d0 / dsqrt(dacos(-1.d0))
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = expos(i) + beta
+      coef_gauss_j_mu_1_erf(i) = tmp * coef_fit_slat_gauss(i) 
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
+  
+  endif
+
+  tmp = 0.25d0 / mu_erf
+  do i = 1, ng_fit_jast
+    coef_gauss_j_mu_1_erf(i) = tmp * coef_gauss_j_mu_1_erf(i)
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+double precision  function F_x_j(x)
+ implicit none
+ BEGIN_DOC 
+ ! F_x_j(x) = dimension-less correlation factor = x (1 - erf(x)) - 1/sqrt(pi) exp(-x^2)
+ END_DOC
+ double precision, intent(in) :: x
+ F_x_j = x * (1.d0 - derf(x)) - 1/dsqrt(dacos(-1.d0)) * dexp(-x**2)
+
+end
+
+double precision function j_mu_F_x_j(x)
+ implicit none
+ BEGIN_DOC 
+ ! j_mu_F_x_j(x) = correlation factor = 1/2 r12 * (1 - erf(mu*r12)) - 1/(2 sqrt(pi)*mu) exp(-(mu*r12)^2)
+ !
+ !         = 1/(2*mu) * F_x_j(mu*x)
+ END_DOC
+ double precision :: F_x_j
+ double precision, intent(in) :: x
+ j_mu_F_x_j = 0.5d0/mu_erf * F_x_j(x*mu_erf)
+end
+
+double precision function j_mu(x)
+ implicit none
+ double precision, intent(in) :: x
+ BEGIN_DOC 
+ ! j_mu(x) = correlation factor = 1/2 r12 * (1 - erf(mu*r12)) - 1/(2 sqrt(pi)*mu) exp(-(mu*r12)^2)
+ END_DOC
+ j_mu = 0.5d0* x * (1.d0 - derf(mu_erf*x)) - 0.5d0/( dsqrt(dacos(-1.d0))*mu_erf) * dexp(-(mu_erf*x)*(mu_erf*x))
+ 
+end
+
+double precision function j_mu_fit_gauss(x)
+ implicit none
+ BEGIN_DOC 
+ ! j_mu_fit_gauss(x) = correlation factor = 1/2 r12 * (1 - erf(mu*r12)) - 1/(2 sqrt(pi)*mu) exp(-(mu*r12)^2)
+ !
+ ! but fitted with gaussians 
+ END_DOC
+ double precision, intent(in) :: x
+ integer :: i
+ double precision :: alpha,coef
+ j_mu_fit_gauss = 0.d0
+ do i = 1, n_max_fit_slat
+  alpha = expo_gauss_j_mu_x(i) 
+  coef  = coef_gauss_j_mu_x(i) 
+  j_mu_fit_gauss +=  coef * dexp(-alpha*x*x)
+ enddo
+ 
+end
+
+! ---
+
diff --git a/src/ao_tc_eff_map/integrals_eff_pot_in_map_slave.irp.f b/src/ao_tc_eff_map/integrals_eff_pot_in_map_slave.irp.f
new file mode 100644
index 00000000..28401cc4
--- /dev/null
+++ b/src/ao_tc_eff_map/integrals_eff_pot_in_map_slave.irp.f
@@ -0,0 +1,194 @@
+subroutine ao_tc_sym_two_e_pot_in_map_slave_tcp(i)
+  implicit none
+  integer, intent(in)            :: i
+  BEGIN_DOC
+! Computes a buffer of integrals. i is the ID of the current thread.
+  END_DOC
+  call ao_tc_sym_two_e_pot_in_map_slave(0,i)
+end
+
+
+subroutine ao_tc_sym_two_e_pot_in_map_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+  BEGIN_DOC
+! Computes a buffer of integrals. i is the ID of the current thread.
+  END_DOC
+  call ao_tc_sym_two_e_pot_in_map_slave(1,i)
+end
+
+
+
+
+
+subroutine ao_tc_sym_two_e_pot_in_map_slave(thread,iproc)
+  use map_module
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Computes a buffer of integrals
+  END_DOC
+
+  integer, intent(in)            :: thread, iproc
+
+  integer                        :: j,l,n_integrals
+  integer                        :: rc
+  real(integral_kind), allocatable :: buffer_value(:)
+  integer(key_kind), allocatable :: buffer_i(:)
+
+  integer                        :: worker_id, task_id
+  character*(512)                :: task
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket
+  integer(ZMQ_PTR)               :: zmq_socket_push
+
+  character*(64)                 :: state
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  allocate ( buffer_i(ao_num*ao_num), buffer_value(ao_num*ao_num) )
+
+
+  do
+    integer, external :: get_task_from_taskserver
+    if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task) == -1) then
+      exit
+    endif
+    if (task_id == 0) exit
+    read(task,*) j, l
+    integer, external :: task_done_to_taskserver
+    call compute_ao_tc_sym_two_e_pot_jl(j,l,n_integrals,buffer_i,buffer_value)
+    if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id) == -1) then
+        stop 'Unable to send task_done'
+    endif
+    call push_integrals(zmq_socket_push, n_integrals, buffer_i, buffer_value, task_id)
+  enddo
+
+  integer, external :: disconnect_from_taskserver
+  if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
+    continue
+  endif
+  deallocate( buffer_i, buffer_value )
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  call end_zmq_push_socket(zmq_socket_push,thread)
+
+end
+
+
+subroutine ao_tc_sym_two_e_pot_in_map_collector(zmq_socket_pull)
+  use map_module
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Collects results from the AO integral calculation
+  END_DOC
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  integer                        :: j,l,n_integrals
+  integer                        :: rc
+
+  real(integral_kind), allocatable :: buffer_value(:)
+  integer(key_kind), allocatable :: buffer_i(:)
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_pull_socket
+
+  integer*8                      :: control, accu, sze
+  integer                        :: task_id, more
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  sze = ao_num*ao_num
+  allocate ( buffer_i(sze), buffer_value(sze) )
+
+  accu = 0_8
+  more = 1
+  do while (more == 1)
+
+    rc = f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0)
+    if (rc == -1) then
+      n_integrals = 0
+      return
+    endif
+    if (rc /= 4) then
+      print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, n_integrals, 4, 0)'
+      stop 'error'
+    endif
+
+    if (n_integrals >= 0) then
+
+      if (n_integrals > sze) then
+        deallocate (buffer_value, buffer_i)
+        sze = n_integrals
+        allocate (buffer_value(sze), buffer_i(sze))
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0)
+      if (rc /= key_kind*n_integrals) then
+        print *,  rc, key_kind, n_integrals
+        print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, buffer_i, key_kind*n_integrals, 0)'
+        stop 'error'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0)
+      if (rc /= integral_kind*n_integrals) then
+        print *, irp_here,  ': f77_zmq_recv( zmq_socket_pull, buffer_value, integral_kind*n_integrals, 0)'
+        stop 'error'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)
+
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+      rc = f77_zmq_send( zmq_socket_pull, 0, 4, 0)
+      if (rc /= 4) then
+        print *,  irp_here, ' : f77_zmq_send (zmq_socket_pull,...'
+        stop 'error'
+      endif
+IRP_ENDIF
+
+
+      call insert_into_ao_tc_sym_two_e_pot_map(n_integrals,buffer_i,buffer_value)
+      accu += n_integrals
+      if (task_id /= 0) then
+        integer, external :: zmq_delete_task
+        if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) == -1) then
+          stop 'Unable to delete task'
+        endif
+      endif
+    endif
+
+  enddo
+
+  deallocate( buffer_i, buffer_value )
+
+  integer (map_size_kind) :: get_ao_tc_sym_two_e_pot_map_size
+  control = get_ao_tc_sym_two_e_pot_map_size(ao_tc_sym_two_e_pot_map)
+
+  if (control /= accu) then
+      print *, ''
+      print *, irp_here
+      print *, 'Control : ', control
+      print *, 'Accu    : ', accu
+      print *, 'Some integrals were lost during the parallel computation.'
+      print *, 'Try to reduce the number of threads.'
+      stop
+  endif
+
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+
+end
+
diff --git a/src/ao_tc_eff_map/map_integrals_eff_pot.irp.f b/src/ao_tc_eff_map/map_integrals_eff_pot.irp.f
new file mode 100644
index 00000000..95dc664d
--- /dev/null
+++ b/src/ao_tc_eff_map/map_integrals_eff_pot.irp.f
@@ -0,0 +1,313 @@
+use map_module
+
+!! AO Map
+!! ======
+
+BEGIN_PROVIDER [ type(map_type), ao_tc_sym_two_e_pot_map ]
+  implicit none
+  BEGIN_DOC
+  ! |AO| integrals
+  END_DOC
+  integer(key_kind)              :: key_max
+  integer(map_size_kind)         :: sze
+  call two_e_integrals_index(ao_num,ao_num,ao_num,ao_num,key_max)
+  sze = key_max
+  call map_init(ao_tc_sym_two_e_pot_map,sze)
+  print*,  'ao_tc_sym_two_e_pot_map map initialized : ', sze
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, ao_tc_sym_two_e_pot_cache_min ]
+&BEGIN_PROVIDER [ integer, ao_tc_sym_two_e_pot_cache_max ]
+ implicit none
+ BEGIN_DOC
+ ! Min and max values of the AOs for which the integrals are in the cache
+ END_DOC
+ ao_tc_sym_two_e_pot_cache_min = max(1,ao_num - 63)
+ ao_tc_sym_two_e_pot_cache_max = ao_num
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_tc_sym_two_e_pot_cache, (0:64*64*64*64) ]
+
+  use map_module
+  implicit none
+
+  BEGIN_DOC
+  ! Cache of |AO| integrals for fast access
+  END_DOC
+
+  integer                        :: i,j,k,l,ii
+  integer(key_kind)              :: idx
+  real(integral_kind)            :: integral
+
+  PROVIDE ao_tc_sym_two_e_pot_in_map
+
+ !$OMP PARALLEL DO PRIVATE (i,j,k,l,idx,ii,integral)
+  do l = ao_tc_sym_two_e_pot_cache_min, ao_tc_sym_two_e_pot_cache_max
+    do k = ao_tc_sym_two_e_pot_cache_min, ao_tc_sym_two_e_pot_cache_max
+      do j = ao_tc_sym_two_e_pot_cache_min, ao_tc_sym_two_e_pot_cache_max
+        do i = ao_tc_sym_two_e_pot_cache_min, ao_tc_sym_two_e_pot_cache_max
+          !DIR$ FORCEINLINE
+          call two_e_integrals_index(i, j, k, l, idx)
+          !DIR$ FORCEINLINE
+          call map_get(ao_tc_sym_two_e_pot_map, idx, integral)
+          ii = l-ao_tc_sym_two_e_pot_cache_min
+          ii = ior( ishft(ii,6), k-ao_tc_sym_two_e_pot_cache_min)
+          ii = ior( ishft(ii,6), j-ao_tc_sym_two_e_pot_cache_min)
+          ii = ior( ishft(ii,6), i-ao_tc_sym_two_e_pot_cache_min)
+          ao_tc_sym_two_e_pot_cache(ii) = integral
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+! ---
+
+subroutine insert_into_ao_tc_sym_two_e_pot_map(n_integrals, buffer_i, buffer_values)
+
+  use map_module
+  implicit none
+
+  BEGIN_DOC
+  ! Create new entry into |AO| map
+  END_DOC
+
+  integer, intent(in)                :: n_integrals
+  integer(key_kind), intent(inout)   :: buffer_i(n_integrals)
+  real(integral_kind), intent(inout) :: buffer_values(n_integrals)
+
+  call map_append(ao_tc_sym_two_e_pot_map, buffer_i, buffer_values, n_integrals)
+
+end
+
+! ---
+
+double precision function get_ao_tc_sym_two_e_pot(i, j, k, l, map) result(result)
+
+  use map_module
+
+  implicit none
+
+  BEGIN_DOC
+  ! Gets one |AO| two-electron integral from the |AO| map
+  END_DOC
+
+  integer, intent(in)            :: i,j,k,l
+  integer(key_kind)              :: idx
+  type(map_type), intent(inout)  :: map
+  integer                        :: ii
+  real(integral_kind)            :: tmp
+  logical, external              :: ao_two_e_integral_zero
+
+  PROVIDE ao_tc_sym_two_e_pot_in_map ao_tc_sym_two_e_pot_cache ao_tc_sym_two_e_pot_cache_min
+
+  !DIR$ FORCEINLINE
+!  if (ao_two_e_integral_zero(i,j,k,l)) then
+  if (.False.) then
+    tmp = 0.d0
+  !else if (ao_two_e_integral_erf_schwartz(i,k)*ao_two_e_integral_erf_schwartz(j,l) < ao_integrals_threshold) then
+  !  tmp = 0.d0
+  else
+    ii = l-ao_tc_sym_two_e_pot_cache_min
+    ii = ior(ii, k-ao_tc_sym_two_e_pot_cache_min)
+    ii = ior(ii, j-ao_tc_sym_two_e_pot_cache_min)
+    ii = ior(ii, i-ao_tc_sym_two_e_pot_cache_min)
+    if (iand(ii, -64) /= 0) then
+      !DIR$ FORCEINLINE
+      call two_e_integrals_index(i, j, k, l, idx)
+      !DIR$ FORCEINLINE
+      call map_get(map, idx, tmp)
+      tmp = tmp
+    else
+      ii = l-ao_tc_sym_two_e_pot_cache_min
+      ii = ior( ishft(ii,6), k-ao_tc_sym_two_e_pot_cache_min)
+      ii = ior( ishft(ii,6), j-ao_tc_sym_two_e_pot_cache_min)
+      ii = ior( ishft(ii,6), i-ao_tc_sym_two_e_pot_cache_min)
+      tmp = ao_tc_sym_two_e_pot_cache(ii)
+    endif
+  endif
+
+  result = tmp
+
+end
+
+! ---
+
+subroutine get_many_ao_tc_sym_two_e_pot(j,k,l,sze,out_val)
+  use map_module
+  BEGIN_DOC
+  ! Gets multiple |AO| two-electron integral from the |AO| map .
+  ! All i are retrieved for j,k,l fixed.
+  END_DOC
+  implicit none
+  integer, intent(in)            :: j,k,l, sze
+  real(integral_kind), intent(out) :: out_val(sze)
+
+  integer                        :: i
+  integer(key_kind)              :: hash
+  double precision               :: thresh
+! logical, external              :: ao_one_e_integral_zero
+  PROVIDE ao_tc_sym_two_e_pot_in_map ao_tc_sym_two_e_pot_map
+  thresh = ao_integrals_threshold
+
+! if (ao_one_e_integral_zero(j,l)) then
+  if (.False.) then
+    out_val = 0.d0
+    return
+  endif
+
+  double precision :: get_ao_tc_sym_two_e_pot
+  do i=1,sze
+    out_val(i) = get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map)
+  enddo
+
+end
+
+subroutine get_many_ao_tc_sym_two_e_pot_non_zero(j,k,l,sze,out_val,out_val_index,non_zero_int)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Gets multiple |AO| two-electron integrals from the |AO| map .
+  ! All non-zero i are retrieved for j,k,l fixed.
+  END_DOC
+  integer, intent(in)            :: j,k,l, sze
+  real(integral_kind), intent(out) :: out_val(sze)
+  integer, intent(out)           :: out_val_index(sze),non_zero_int
+
+  integer                        :: i
+  integer(key_kind)              :: hash
+  double precision               :: thresh,tmp
+! logical, external              :: ao_one_e_integral_zero
+  PROVIDE ao_tc_sym_two_e_pot_in_map
+  thresh = ao_integrals_threshold
+
+  non_zero_int = 0
+! if (ao_one_e_integral_zero(j,l)) then
+  if (.False.) then
+    out_val = 0.d0
+    return
+  endif
+
+  non_zero_int = 0
+  do i=1,sze
+    integer, external :: ao_l4
+    double precision, external :: ao_two_e_integral_eff_pot
+    !DIR$ FORCEINLINE
+    !if (ao_two_e_integral_erf_schwartz(i,k)*ao_two_e_integral_erf_schwartz(j,l) < thresh) then
+    !  cycle
+    !endif
+    call two_e_integrals_index(i,j,k,l,hash)
+    call map_get(ao_tc_sym_two_e_pot_map, hash,tmp)
+    if (dabs(tmp) < thresh ) cycle
+    non_zero_int = non_zero_int+1
+    out_val_index(non_zero_int) = i
+    out_val(non_zero_int) = tmp
+  enddo
+
+end
+
+
+function get_ao_tc_sym_two_e_pot_map_size()
+  implicit none
+  integer (map_size_kind) :: get_ao_tc_sym_two_e_pot_map_size
+  BEGIN_DOC
+  ! Returns the number of elements in the |AO| map
+  END_DOC
+  get_ao_tc_sym_two_e_pot_map_size = ao_tc_sym_two_e_pot_map % n_elements
+end
+
+subroutine clear_ao_tc_sym_two_e_pot_map
+  implicit none
+  BEGIN_DOC
+  ! Frees the memory of the |AO| map
+  END_DOC
+  call map_deinit(ao_tc_sym_two_e_pot_map)
+  FREE ao_tc_sym_two_e_pot_map
+end
+
+
+
+subroutine dump_ao_tc_sym_two_e_pot(filename)
+  use map_module
+  implicit none
+  BEGIN_DOC
+  ! Save to disk the |AO| eff_pot integrals
+  END_DOC
+  character*(*), intent(in)      :: filename
+  integer(cache_key_kind), pointer :: key(:)
+  real(integral_kind), pointer   :: val(:)
+  integer*8                      :: i,j, n
+  call ezfio_set_work_empty(.False.)
+  open(unit=66,file=filename,FORM='unformatted')
+  write(66) integral_kind, key_kind
+  write(66) ao_tc_sym_two_e_pot_map%sorted, ao_tc_sym_two_e_pot_map%map_size,    &
+      ao_tc_sym_two_e_pot_map%n_elements
+  do i=0_8,ao_tc_sym_two_e_pot_map%map_size
+    write(66) ao_tc_sym_two_e_pot_map%map(i)%sorted, ao_tc_sym_two_e_pot_map%map(i)%map_size,&
+        ao_tc_sym_two_e_pot_map%map(i)%n_elements
+  enddo
+  do i=0_8,ao_tc_sym_two_e_pot_map%map_size
+    key => ao_tc_sym_two_e_pot_map%map(i)%key
+    val => ao_tc_sym_two_e_pot_map%map(i)%value
+    n = ao_tc_sym_two_e_pot_map%map(i)%n_elements
+    write(66) (key(j), j=1,n), (val(j), j=1,n)
+  enddo
+  close(66)
+
+end
+
+
+
+integer function load_ao_tc_sym_two_e_pot(filename)
+  implicit none
+  BEGIN_DOC
+  ! Read from disk the |AO| eff_pot integrals
+  END_DOC
+  character*(*), intent(in)      :: filename
+  integer*8                      :: i
+  integer(cache_key_kind), pointer :: key(:)
+  real(integral_kind), pointer   :: val(:)
+  integer                        :: iknd, kknd
+  integer*8                      :: n, j
+  load_ao_tc_sym_two_e_pot = 1
+  open(unit=66,file=filename,FORM='unformatted',STATUS='UNKNOWN')
+  read(66,err=98,end=98) iknd, kknd
+  if (iknd /= integral_kind) then
+    print *,  'Wrong integrals kind in file :', iknd
+    stop 1
+  endif
+  if (kknd /= key_kind) then
+    print *,  'Wrong key kind in file :', kknd
+    stop 1
+  endif
+  read(66,err=98,end=98) ao_tc_sym_two_e_pot_map%sorted, ao_tc_sym_two_e_pot_map%map_size,&
+      ao_tc_sym_two_e_pot_map%n_elements
+  do i=0_8, ao_tc_sym_two_e_pot_map%map_size
+    read(66,err=99,end=99) ao_tc_sym_two_e_pot_map%map(i)%sorted,          &
+        ao_tc_sym_two_e_pot_map%map(i)%map_size, ao_tc_sym_two_e_pot_map%map(i)%n_elements
+    call cache_map_reallocate(ao_tc_sym_two_e_pot_map%map(i),ao_tc_sym_two_e_pot_map%map(i)%map_size)
+  enddo
+  do i=0_8, ao_tc_sym_two_e_pot_map%map_size
+    key => ao_tc_sym_two_e_pot_map%map(i)%key
+    val => ao_tc_sym_two_e_pot_map%map(i)%value
+    n = ao_tc_sym_two_e_pot_map%map(i)%n_elements
+    read(66,err=99,end=99) (key(j), j=1,n), (val(j), j=1,n)
+  enddo
+  call map_sort(ao_tc_sym_two_e_pot_map)
+  load_ao_tc_sym_two_e_pot = 0
+  return
+  99 continue
+  call map_deinit(ao_tc_sym_two_e_pot_map)
+  98 continue
+  stop 'Problem reading ao_tc_sym_two_e_pot_map file in work/'
+
+end
+
+
+
+
diff --git a/src/ao_tc_eff_map/one_e_1bgauss_grad2.irp.f b/src/ao_tc_eff_map/one_e_1bgauss_grad2.irp.f
new file mode 100644
index 00000000..50c396de
--- /dev/null
+++ b/src/ao_tc_eff_map/one_e_1bgauss_grad2.irp.f
@@ -0,0 +1,332 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, j1b_gauss_hermII, (ao_num,ao_num)]
+
+  BEGIN_DOC
+  !
+  !  :math:`\langle \chi_A | -0.5 \grad \tau_{1b} \cdot \grad \tau_{1b} | \chi_B \rangle` 
+  !
+  END_DOC
+
+  implicit none
+
+  integer          :: num_A, num_B
+  integer          :: power_A(3), power_B(3)
+  integer          :: i, j, k1, k2, l, m
+  double precision :: alpha, beta, gama1, gama2, coef1, coef2
+  double precision :: A_center(3), B_center(3), C_center1(3), C_center2(3)
+  double precision :: c1, c
+
+  integer          :: dim1
+  double precision :: overlap_y, d_a_2, overlap_z, overlap
+
+  double precision :: int_gauss_4G
+
+  PROVIDE j1b_type j1b_pen j1b_coeff
+
+  ! --------------------------------------------------------------------------------
+  ! -- Dummy call to provide everything
+  dim1        = 100
+  A_center(:) = 0.d0
+  B_center(:) = 1.d0
+  alpha       = 1.d0
+  beta        = 0.1d0
+  power_A(:)  = 1
+  power_B(:)  = 0
+  call overlap_gaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                           , overlap_y, d_a_2, overlap_z, overlap, dim1 )
+  ! --------------------------------------------------------------------------------
+  
+
+  j1b_gauss_hermII(1:ao_num,1:ao_num) = 0.d0
+
+  if(j1b_type .eq. 1) then
+  ! \tau_1b = \sum_iA -[1 - exp(-alpha_A r_iA^2)]
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k1, k2, l, m, alpha, beta, gama1, gama2,  &
+ !$OMP          A_center, B_center, C_center1, C_center2,       &
+ !$OMP          power_A, power_B, num_A, num_B, c1, c)          &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_hermII)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k1 = 1, nucl_num
+              gama1          = j1b_pen(k1)
+              C_center1(1:3) = nucl_coord(k1,1:3)
+  
+              do k2 = 1, nucl_num
+                gama2          = j1b_pen(k2)
+                C_center2(1:3) = nucl_coord(k2,1:3)
+  
+                ! < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] r_C1 \cdot r_C2 | XB >
+                c1 = int_gauss_4G( A_center, B_center, C_center1, C_center2     &
+                                 , power_A, power_B, alpha, beta, gama1, gama2  )
+  
+                c = c - 2.d0 * gama1 * gama2 * c1
+              enddo
+            enddo
+  
+            j1b_gauss_hermII(i,j) = j1b_gauss_hermII(i,j)      & 
+                      + ao_coef_normalized_ordered_transp(l,j) &
+                      * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  elseif(j1b_type .eq. 2) then
+  ! \tau_1b = \sum_iA [c_A exp(-alpha_A r_iA^2)]
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k1, k2, l, m, alpha, beta, gama1, gama2,  &
+ !$OMP          A_center, B_center, C_center1, C_center2,       &
+ !$OMP          power_A, power_B, num_A, num_B, c1, c,          &
+ !$OMP          coef1, coef2)                                   &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_hermII,             &
+ !$OMP         j1b_coeff)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k1 = 1, nucl_num
+              gama1          = j1b_pen  (k1)
+              coef1          = j1b_coeff(k1)
+              C_center1(1:3) = nucl_coord(k1,1:3)
+  
+              do k2 = 1, nucl_num
+                gama2          = j1b_pen  (k2)
+                coef2          = j1b_coeff(k2)
+                C_center2(1:3) = nucl_coord(k2,1:3)
+  
+                ! < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] r_C1 \cdot r_C2 | XB >
+                c1 = int_gauss_4G( A_center, B_center, C_center1, C_center2     &
+                                 , power_A, power_B, alpha, beta, gama1, gama2  )
+  
+                c = c - 2.d0 * gama1 * gama2 * coef1 * coef2 * c1
+              enddo
+            enddo
+  
+            j1b_gauss_hermII(i,j) = j1b_gauss_hermII(i,j)      & 
+                      + ao_coef_normalized_ordered_transp(l,j) &
+                      * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  endif
+
+END_PROVIDER
+
+
+
+
+
+!_____________________________________________________________________________________________________________
+!
+!               < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] r_C1 \cdot r_C2 | XB >
+!
+double precision function int_gauss_4G( A_center, B_center, C_center1, C_center2, power_A, power_B &
+                                      , alpha, beta, gama1, gama2 )
+
+  ! for max_dim
+  include 'constants.include.F'
+
+  implicit none
+
+  integer         , intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: A_center(3), B_center(3), C_center1(3), C_center2(3)
+  double precision, intent(in) :: alpha, beta, gama1, gama2
+
+  integer                      :: i, dim1, power_C
+  integer                      :: iorder(3)
+  double precision             :: AB_expo, fact_AB, AB_center(3), P_AB(0:max_dim,3)
+  double precision             :: gama, fact_C, C_center(3)
+  double precision             :: cx0, cy0, cz0, c_tmp1, c_tmp2, cx, cy, cz
+  double precision             :: int_tmp
+
+  double precision             :: overlap_gaussian_x
+
+  dim1 = 100
+
+  ! P_AB(0:max_dim,3) polynomial
+  ! AB_center(3)      new center
+  ! AB_expo           new exponent
+  ! fact_AB           constant factor
+  ! iorder(3)         i_order(i) = order of the polynomials
+  call give_explicit_poly_and_gaussian( P_AB, AB_center, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_B, A_center, B_center, dim1)
+
+  call gaussian_product(gama1, C_center1, gama2, C_center2, fact_C, gama, C_center)
+
+  ! <<<
+  ! to avoid multi-evaluation
+  power_C = 0
+
+  cx0 = 0.d0
+  do i = 0, iorder(1)
+    cx0 = cx0 + P_AB(i,1) * overlap_gaussian_x( AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cy0 = 0.d0
+  do i = 0, iorder(2)
+    cy0 = cy0 + P_AB(i,2) * overlap_gaussian_x( AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cz0 = 0.d0
+  do i = 0, iorder(3)
+    cz0 = cz0 + P_AB(i,3) * overlap_gaussian_x( AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+  ! >>>
+
+  int_tmp = 0.d0
+
+  ! -----------------------------------------------------------------------------------------------
+  !
+  ! x term:
+  !          < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] (x - x_C1) (x - x_C2) | XB > 
+  !
+
+  c_tmp1 = 2.d0 * C_center(1) - C_center1(1) - C_center2(1)
+  c_tmp2 = ( C_center(1) - C_center1(1) ) * ( C_center(1) - C_center2(1) ) 
+
+  cx = 0.d0
+  do i = 0, iorder(1)
+    
+    ! < XA | exp[-gama r_C^2] (x - x_C)^2 | XB >
+    power_C = 2
+    cx      = cx + P_AB(i,1) &
+            * overlap_gaussian_x( AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] (x - x_C) | XB >
+    power_C = 1
+    cx      = cx + P_AB(i,1) * c_tmp1 &
+            * overlap_gaussian_x( AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] | XB >
+    power_C = 0
+    cx      = cx + P_AB(i,1) * c_tmp2 &
+            * overlap_gaussian_x( AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+
+  enddo
+
+  int_tmp += cx * cy0 * cz0
+
+  ! -----------------------------------------------------------------------------------------------
+
+
+  ! -----------------------------------------------------------------------------------------------
+  !
+  ! y term:
+  !          < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] (y - y_C1) (y - y_C2) | XB > 
+  !
+
+  c_tmp1 = 2.d0 * C_center(2) - C_center1(2) - C_center2(2)
+  c_tmp2 = ( C_center(2) - C_center1(2) ) * ( C_center(2) - C_center2(2) ) 
+
+  cy = 0.d0
+  do i = 0, iorder(2)
+    
+    ! < XA | exp[-gama r_C^2] (y - y_C)^2 | XB >
+    power_C = 2
+    cy      = cy + P_AB(i,2) &
+            * overlap_gaussian_x( AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] (y - y_C) | XB >
+    power_C = 1
+    cy      = cy + P_AB(i,2) * c_tmp1 &
+            * overlap_gaussian_x( AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] | XB >
+    power_C = 0
+    cy      = cy + P_AB(i,2) * c_tmp2 &
+            * overlap_gaussian_x( AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+
+  enddo
+
+  int_tmp += cx0 * cy * cz0
+
+  ! -----------------------------------------------------------------------------------------------
+
+
+  ! -----------------------------------------------------------------------------------------------
+  !
+  ! z term:
+  !          < XA | exp[-gama1 r_C1^2 -gama2 r_C2^2] (z - z_C1) (z - z_C2) | XB > 
+  !
+
+  c_tmp1 = 2.d0 * C_center(3) - C_center1(3) - C_center2(3)
+  c_tmp2 = ( C_center(3) - C_center1(3) ) * ( C_center(3) - C_center2(3) ) 
+
+  cz = 0.d0
+  do i = 0, iorder(3)
+    
+    ! < XA | exp[-gama r_C^2] (z - z_C)^2 | XB >
+    power_C = 2
+    cz      = cz + P_AB(i,3) &
+            * overlap_gaussian_x( AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] (z - z_C) | XB >
+    power_C = 1
+    cz      = cz + P_AB(i,3) * c_tmp1 &
+            * overlap_gaussian_x( AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+
+    ! < XA | exp[-gama r_C^2] | XB >
+    power_C = 0
+    cz      = cz + P_AB(i,3) * c_tmp2 &
+            * overlap_gaussian_x( AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+
+  enddo
+
+  int_tmp += cx0 * cy0 * cz
+
+  ! -----------------------------------------------------------------------------------------------
+
+  int_gauss_4G = fact_AB * fact_C * int_tmp
+
+  return
+end function int_gauss_4G
+!_____________________________________________________________________________________________________________
+!_____________________________________________________________________________________________________________
+
+
diff --git a/src/ao_tc_eff_map/one_e_1bgauss_lap.irp.f b/src/ao_tc_eff_map/one_e_1bgauss_lap.irp.f
new file mode 100644
index 00000000..0a0b7610
--- /dev/null
+++ b/src/ao_tc_eff_map/one_e_1bgauss_lap.irp.f
@@ -0,0 +1,303 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, j1b_gauss_hermI, (ao_num,ao_num)]
+
+  BEGIN_DOC
+  !
+  !  :math:`\langle \chi_A | -0.5 \Delta \tau_{1b} | \chi_B \rangle` 
+  !
+  END_DOC
+
+  implicit none
+
+  integer          :: num_A, num_B
+  integer          :: power_A(3), power_B(3)
+  integer          :: i, j, k, l, m
+  double precision :: alpha, beta, gama, coef
+  double precision :: A_center(3), B_center(3), C_center(3)
+  double precision :: c1, c2, c
+
+  integer          :: dim1
+  double precision :: overlap_y, d_a_2, overlap_z, overlap
+
+  double precision :: int_gauss_r0, int_gauss_r2
+
+  PROVIDE j1b_type j1b_pen j1b_coeff
+
+  ! --------------------------------------------------------------------------------
+  ! -- Dummy call to provide everything
+  dim1        = 100
+  A_center(:) = 0.d0
+  B_center(:) = 1.d0
+  alpha       = 1.d0
+  beta        = 0.1d0
+  power_A(:)  = 1
+  power_B(:)  = 0
+  call overlap_gaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                           , overlap_y, d_a_2, overlap_z, overlap, dim1 )
+  ! --------------------------------------------------------------------------------
+  
+  j1b_gauss_hermI(1:ao_num,1:ao_num) = 0.d0
+
+  if(j1b_type .eq. 1) then
+  ! \tau_1b = \sum_iA -[1 - exp(-alpha_A r_iA^2)]
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k, l, m, alpha, beta, gama,               &
+ !$OMP          A_center, B_center, C_center, power_A, power_B, &
+ !$OMP          num_A, num_B, c1, c2, c)                        &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_hermI)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k = 1, nucl_num
+              gama          = j1b_pen(k)
+              C_center(1:3) = nucl_coord(k,1:3)
+  
+              ! < XA | exp[-gama r_C^2] | XB >
+              c1 = int_gauss_r0( A_center, B_center, C_center        &
+                               , power_A, power_B, alpha, beta, gama )
+  
+              ! < XA | r_A^2 exp[-gama r_C^2] | XB >
+              c2 = int_gauss_r2( A_center, B_center, C_center        &
+                               , power_A, power_B, alpha, beta, gama )
+  
+              c = c + 3.d0 * gama * c1 - 2.d0 * gama * gama * c2
+            enddo
+  
+            j1b_gauss_hermI(i,j) = j1b_gauss_hermI(i,j)      & 
+                    + ao_coef_normalized_ordered_transp(l,j) &
+                    * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  elseif(j1b_type .eq. 2) then
+  ! \tau_1b = \sum_iA [c_A exp(-alpha_A r_iA^2)]
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k, l, m, alpha, beta, gama, coef,         &
+ !$OMP          A_center, B_center, C_center, power_A, power_B, &
+ !$OMP          num_A, num_B, c1, c2, c)                        &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_hermI,              &
+ !$OMP         j1b_coeff)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k = 1, nucl_num
+              gama          = j1b_pen  (k)
+              coef          = j1b_coeff(k)
+              C_center(1:3) = nucl_coord(k,1:3)
+  
+              ! < XA | exp[-gama r_C^2] | XB >
+              c1 = int_gauss_r0( A_center, B_center, C_center        &
+                               , power_A, power_B, alpha, beta, gama )
+  
+              ! < XA | r_A^2 exp[-gama r_C^2] | XB >
+              c2 = int_gauss_r2( A_center, B_center, C_center        &
+                               , power_A, power_B, alpha, beta, gama )
+  
+              c = c + 3.d0 * gama * coef * c1 - 2.d0 * gama * gama * coef * c2
+            enddo
+  
+            j1b_gauss_hermI(i,j) = j1b_gauss_hermI(i,j)      & 
+                    + ao_coef_normalized_ordered_transp(l,j) &
+                    * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  endif
+
+END_PROVIDER
+
+
+!_____________________________________________________________________________________________________________
+!
+!                             < XA | exp[-gama r_C^2] | XB >
+!
+double precision function int_gauss_r0(A_center, B_center, C_center, power_A, power_B, alpha, beta, gama)
+
+  ! for max_dim
+  include 'constants.include.F'
+
+  implicit none
+
+  integer         , intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: A_center(3), B_center(3), C_center(3)
+  double precision, intent(in) :: alpha, beta, gama 
+
+  integer                      :: i, power_C, dim1
+  integer                      :: iorder(3)
+  integer                      :: nmax
+  double precision             :: AB_expo, fact_AB, AB_center(3), P_AB(0:max_dim,3)
+  double precision             :: cx, cy, cz 
+
+  double precision             :: overlap_gaussian_x 
+
+  dim1 = 100
+
+  ! P_AB(0:max_dim,3) polynomial
+  ! AB_center(3)      new center
+  ! AB_expo           new exponent
+  ! fact_AB           constant factor
+  ! iorder(3)         i_order(i) = order of the polynomials
+  call give_explicit_poly_and_gaussian( P_AB, AB_center, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_B, A_center, B_center, dim1)
+
+  if( fact_AB .lt. 1d-20 ) then
+    int_gauss_r0 = 0.d0
+    return
+  endif
+
+  power_C = 0
+  cx = 0.d0
+  do i = 0, iorder(1)
+    cx = cx + P_AB(i,1) * overlap_gaussian_x(AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cy = 0.d0
+  do i = 0, iorder(2)
+    cy = cy + P_AB(i,2) * overlap_gaussian_x(AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cz = 0.d0
+  do i = 0, iorder(3)
+    cz = cz + P_AB(i,3) * overlap_gaussian_x(AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+
+  int_gauss_r0 = fact_AB * cx * cy * cz
+
+  return
+end function int_gauss_r0 
+!_____________________________________________________________________________________________________________
+!_____________________________________________________________________________________________________________
+
+
+
+!_____________________________________________________________________________________________________________
+!
+!                             < XA | r_C^2 exp[-gama r_C^2] | XB >
+!
+double precision function int_gauss_r2(A_center, B_center, C_center, power_A, power_B, alpha, beta, gama)
+
+  ! for max_dim
+  include 'constants.include.F'
+
+  implicit none
+
+  integer,          intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: A_center(3), B_center(3), C_center(3)
+  double precision, intent(in) :: alpha, beta, gama 
+
+  integer                      :: i, power_C, dim1
+  integer                      :: iorder(3)
+  double precision             :: AB_expo, fact_AB, AB_center(3), P_AB(0:max_dim,3)
+  double precision             :: cx0, cy0, cz0, cx, cy, cz
+  double precision             :: int_tmp
+
+  double precision             :: overlap_gaussian_x
+
+  dim1 = 100
+
+  ! P_AB(0:max_dim,3) polynomial centered on AB_center
+  ! AB_center(3)      new center
+  ! AB_expo           new exponent
+  ! fact_AB           constant factor
+  ! iorder(3)         i_order(i) = order of the polynomials
+  call give_explicit_poly_and_gaussian( P_AB, AB_center, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_B, A_center, B_center, dim1)
+
+  ! <<<
+  ! to avoid multi-evaluation
+  power_C = 0
+
+  cx0 = 0.d0
+  do i = 0, iorder(1)
+    cx0 = cx0 + P_AB(i,1) * overlap_gaussian_x(AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cy0 = 0.d0
+  do i = 0, iorder(2)
+    cy0 = cy0 + P_AB(i,2) * overlap_gaussian_x(AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  cz0 = 0.d0
+  do i = 0, iorder(3)
+    cz0 = cz0 + P_AB(i,3) * overlap_gaussian_x(AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+  ! >>>
+
+  int_tmp = 0.d0
+
+  power_C = 2
+
+  ! ( x - XC)^2
+  cx = 0.d0
+  do i = 0, iorder(1)
+    cx = cx + P_AB(i,1) * overlap_gaussian_x(AB_center(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  int_tmp += cx * cy0 * cz0
+
+  ! ( y - YC)^2
+  cy = 0.d0
+  do i = 0, iorder(2)
+    cy = cy + P_AB(i,2) * overlap_gaussian_x(AB_center(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  int_tmp += cx0 * cy * cz0
+
+  ! ( z - ZC)^2
+  cz = 0.d0
+  do i = 0, iorder(3)
+    cz = cz + P_AB(i,3) * overlap_gaussian_x(AB_center(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+  int_tmp += cx0 * cy0 * cz
+
+  int_gauss_r2 = fact_AB * int_tmp
+
+  return
+end function int_gauss_r2
+!_____________________________________________________________________________________________________________
+!_____________________________________________________________________________________________________________
+
+
diff --git a/src/ao_tc_eff_map/one_e_1bgauss_nonherm.irp.f b/src/ao_tc_eff_map/one_e_1bgauss_nonherm.irp.f
new file mode 100644
index 00000000..bd881d32
--- /dev/null
+++ b/src/ao_tc_eff_map/one_e_1bgauss_nonherm.irp.f
@@ -0,0 +1,371 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, j1b_gauss_nonherm, (ao_num,ao_num)]
+
+  BEGIN_DOC
+  !
+  !  j1b_gauss_nonherm(i,j) = \langle \chi_j | - grad \tau_{1b} \cdot grad | \chi_i \rangle  
+  !
+  END_DOC
+
+  implicit none
+
+  integer          :: num_A, num_B
+  integer          :: power_A(3), power_B(3)
+  integer          :: i, j, k, l, m
+  double precision :: alpha, beta, gama, coef
+  double precision :: A_center(3), B_center(3), C_center(3)
+  double precision :: c1, c
+
+  integer          :: dim1
+  double precision :: overlap_y, d_a_2, overlap_z, overlap
+
+  double precision :: int_gauss_deriv
+
+  PROVIDE j1b_type j1b_pen j1b_coeff
+
+  ! --------------------------------------------------------------------------------
+  ! -- Dummy call to provide everything
+  dim1        = 100
+  A_center(:) = 0.d0
+  B_center(:) = 1.d0
+  alpha       = 1.d0
+  beta        = 0.1d0
+  power_A(:)  = 1
+  power_B(:)  = 0
+  call overlap_gaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                           , overlap_y, d_a_2, overlap_z, overlap, dim1 )
+  ! --------------------------------------------------------------------------------
+  
+
+  j1b_gauss_nonherm(1:ao_num,1:ao_num) = 0.d0
+
+   if(j1b_type .eq. 1) then
+  ! \tau_1b = \sum_iA -[1 - exp(-alpha_A r_iA^2)] 
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k, l, m, alpha, beta, gama,               &
+ !$OMP          A_center, B_center, C_center, power_A, power_B, &
+ !$OMP          num_A, num_B, c1, c)                            &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_nonherm)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k = 1, nucl_num
+              gama          = j1b_pen(k)
+              C_center(1:3) = nucl_coord(k,1:3)
+  
+              !  \langle \chi_A | exp[-gama r_C^2] r_C \cdot grad | \chi_B \rangle
+              c1 = int_gauss_deriv( A_center, B_center, C_center        &
+                                  , power_A, power_B, alpha, beta, gama )
+  
+              c = c + 2.d0 * gama * c1 
+            enddo
+  
+            j1b_gauss_nonherm(i,j) =  j1b_gauss_nonherm(i,j) & 
+                    + ao_coef_normalized_ordered_transp(l,j) &
+                    * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  elseif(j1b_type .eq. 2) then
+  ! \tau_1b = \sum_iA [c_A exp(-alpha_A r_iA^2)]
+
+ !$OMP PARALLEL                                                 &
+ !$OMP DEFAULT (NONE)                                           &
+ !$OMP PRIVATE (i, j, k, l, m, alpha, beta, gama, coef,         &
+ !$OMP          A_center, B_center, C_center, power_A, power_B, &
+ !$OMP          num_A, num_B, c1, c)                            &
+ !$OMP SHARED (ao_num, ao_prim_num, ao_expo_ordered_transp,     & 
+ !$OMP         ao_power, ao_nucl, nucl_coord,                   &
+ !$OMP         ao_coef_normalized_ordered_transp,               &
+ !$OMP         nucl_num, j1b_pen, j1b_gauss_nonherm,            &
+ !$OMP         j1b_coeff)
+ !$OMP DO SCHEDULE (dynamic)
+    do j = 1, ao_num
+      num_A         = ao_nucl(j)
+      power_A(1:3)  = ao_power(j,1:3)
+      A_center(1:3) = nucl_coord(num_A,1:3)
+  
+      do i = 1, ao_num
+        num_B         = ao_nucl(i)
+        power_B(1:3)  = ao_power(i,1:3)
+        B_center(1:3) = nucl_coord(num_B,1:3)
+  
+        do l = 1, ao_prim_num(j)
+          alpha = ao_expo_ordered_transp(l,j)
+  
+          do m = 1, ao_prim_num(i)
+            beta = ao_expo_ordered_transp(m,i)
+  
+            c = 0.d0
+            do k = 1, nucl_num
+              gama          = j1b_pen  (k)
+              coef          = j1b_coeff(k)
+              C_center(1:3) = nucl_coord(k,1:3)
+  
+              !  \langle \chi_A | exp[-gama r_C^2] r_C \cdot grad | \chi_B \rangle
+              c1 = int_gauss_deriv( A_center, B_center, C_center        &
+                                  , power_A, power_B, alpha, beta, gama )
+  
+              c = c + 2.d0 * gama * coef * c1 
+            enddo
+  
+            j1b_gauss_nonherm(i,j) =  j1b_gauss_nonherm(i,j) & 
+                    + ao_coef_normalized_ordered_transp(l,j) &
+                    * ao_coef_normalized_ordered_transp(m,i) * c
+          enddo
+        enddo
+      enddo
+    enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  endif
+
+END_PROVIDER
+
+
+
+
+
+!_____________________________________________________________________________________________________________
+!
+!                            < XA | exp[-gama r_C^2] r_C \cdot grad | XB >
+!
+double precision function int_gauss_deriv(A_center, B_center, C_center, power_A, power_B, alpha, beta, gama)
+
+  ! for max_dim
+  include 'constants.include.F'
+
+  implicit none
+
+  double precision, intent(in) :: A_center(3), B_center(3), C_center(3)
+  integer         , intent(in) :: power_A(3), power_B(3)
+  double precision, intent(in) :: alpha, beta, gama 
+
+  integer                      :: i, power_C, dim1
+  integer                      :: iorder(3), power_D(3)
+  double precision             :: AB_expo
+  double precision             :: fact_AB, center_AB(3), pol_AB(0:max_dim,3)
+  double precision             :: cx, cy, cz
+
+  double precision             :: overlap_gaussian_x
+
+  dim1 = 100
+
+  int_gauss_deriv = 0.d0
+
+  ! ===============
+  ! term I:
+  !     \partial_x
+  ! ===============
+
+  if( power_B(1) .ge. 1 ) then
+
+    power_D(1) = power_B(1) - 1
+    power_D(2) = power_B(2)
+    power_D(3) = power_B(3)
+
+    call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                        , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+    power_C = 1
+    cx = 0.d0
+    do i = 0, iorder(1)
+      cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 0
+    cy = 0.d0
+    do i = 0, iorder(2)
+      cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 0
+    cz = 0.d0
+    do i = 0, iorder(3)
+      cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+    enddo
+
+    int_gauss_deriv = int_gauss_deriv + fact_AB * dble(power_B(1)) * cx * cy * cz
+  endif
+
+  ! ===============
+
+  power_D(1) = power_B(1) + 1
+  power_D(2) = power_B(2)
+  power_D(3) = power_B(3)
+
+  call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+  power_C = 1
+  cx = 0.d0
+  do i = 0, iorder(1)
+    cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 0
+  cy = 0.d0
+  do i = 0, iorder(2)
+    cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 0
+  cz = 0.d0
+  do i = 0, iorder(3)
+    cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+
+  int_gauss_deriv = int_gauss_deriv - 2.d0 * beta * fact_AB * cx * cy * cz
+
+  ! ===============
+  ! ===============
+
+
+  ! ===============
+  ! term II:
+  !     \partial_y
+  ! ===============
+
+  if( power_B(2) .ge. 1 ) then
+
+    power_D(1) = power_B(1) 
+    power_D(2) = power_B(2) - 1
+    power_D(3) = power_B(3)
+
+    call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                        , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+    power_C = 0
+    cx = 0.d0
+    do i = 0, iorder(1)
+      cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 1
+    cy = 0.d0
+    do i = 0, iorder(2)
+      cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 0
+    cz = 0.d0
+    do i = 0, iorder(3)
+      cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+    enddo
+
+    int_gauss_deriv = int_gauss_deriv + fact_AB * dble(power_B(2)) * cx * cy * cz
+  endif
+
+  ! ===============
+
+  power_D(1) = power_B(1) 
+  power_D(2) = power_B(2) + 1
+  power_D(3) = power_B(3)
+
+  call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+  power_C = 0
+  cx = 0.d0
+  do i = 0, iorder(1)
+    cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 1
+  cy = 0.d0
+  do i = 0, iorder(2)
+    cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 0
+  cz = 0.d0
+  do i = 0, iorder(3)
+    cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+
+  int_gauss_deriv = int_gauss_deriv - 2.d0 * beta * fact_AB * cx * cy * cz
+
+  ! ===============
+  ! ===============
+
+  ! ===============
+  ! term III:
+  !     \partial_z
+  ! ===============
+
+  if( power_B(3) .ge. 1 ) then
+
+    power_D(1) = power_B(1) 
+    power_D(2) = power_B(2) 
+    power_D(3) = power_B(3) - 1
+
+    call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                        , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+    power_C = 0
+    cx = 0.d0
+    do i = 0, iorder(1)
+      cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 0
+    cy = 0.d0
+    do i = 0, iorder(2)
+      cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+    enddo
+    power_C = 1
+    cz = 0.d0
+    do i = 0, iorder(3)
+      cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+    enddo
+
+    int_gauss_deriv = int_gauss_deriv + fact_AB * dble(power_B(3)) * cx * cy * cz
+  endif
+
+  ! ===============
+
+  power_D(1) = power_B(1) 
+  power_D(2) = power_B(2)
+  power_D(3) = power_B(3) + 1
+
+  call give_explicit_poly_and_gaussian( pol_AB, center_AB, AB_expo, fact_AB &
+                                      , iorder, alpha, beta, power_A, power_D, A_center, B_center, dim1)
+  power_C = 0
+  cx = 0.d0
+  do i = 0, iorder(1)
+    cx = cx + pol_AB(i,1) * overlap_gaussian_x( center_AB(1), C_center(1), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 0
+  cy = 0.d0
+  do i = 0, iorder(2)
+    cy = cy + pol_AB(i,2) * overlap_gaussian_x( center_AB(2), C_center(2), AB_expo, gama, i, power_C, dim1)
+  enddo
+  power_C = 1
+  cz = 0.d0
+  do i = 0, iorder(3)
+    cz = cz + pol_AB(i,3) * overlap_gaussian_x( center_AB(3), C_center(3), AB_expo, gama, i, power_C, dim1)
+  enddo
+
+  int_gauss_deriv = int_gauss_deriv - 2.d0 * beta * fact_AB * cx * cy * cz
+
+  ! ===============
+  ! ===============
+
+  return
+end function int_gauss_deriv
+!_____________________________________________________________________________________________________________
+!_____________________________________________________________________________________________________________
+
+
diff --git a/src/ao_tc_eff_map/potential.irp.f b/src/ao_tc_eff_map/potential.irp.f
new file mode 100644
index 00000000..5b72b567
--- /dev/null
+++ b/src/ao_tc_eff_map/potential.irp.f
@@ -0,0 +1,335 @@
+! ---
+
+BEGIN_PROVIDER [integer, n_gauss_eff_pot]
+
+  BEGIN_DOC
+  ! number of gaussians to represent the effective potential :
+  !
+  ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
+  !
+  ! Here (1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  implicit none
+
+  n_gauss_eff_pot = ng_fit_jast + 1
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [integer, n_gauss_eff_pot_deriv]
+
+  BEGIN_DOC
+  ! V(r12) = -(1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  implicit none
+  n_gauss_eff_pot_deriv = ng_fit_jast
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_eff_pot, (n_gauss_eff_pot)]
+&BEGIN_PROVIDER [double precision, coef_gauss_eff_pot, (n_gauss_eff_pot)]
+
+  BEGIN_DOC
+  ! Coefficients and exponents of the Fit on Gaussians of V(X) = -(1 - erf(mu*X))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*X)^2)
+  !
+  ! V(X) = \sum_{i=1,n_gauss_eff_pot} coef_gauss_eff_pot(i) * exp(-expo_gauss_eff_pot(i) * X^2)
+  !
+  ! Relies on the fit proposed in Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+  integer :: i
+ 
+  ! fit of the -0.25 * (1 - erf(mu*x))^2 with n_max_fit_slat gaussians 
+  do i = 1, ng_fit_jast
+   expo_gauss_eff_pot(i) = expo_gauss_1_erf_x_2(i) 
+   coef_gauss_eff_pot(i) = -0.25d0 * coef_gauss_1_erf_x_2(i) ! -1/4 * (1 - erf(mu*x))^2
+  enddo
+
+  ! Analytical Gaussian part of the potential: + 1/(\sqrt(pi)mu) * exp(-(mu*x)^2) 
+  expo_gauss_eff_pot(ng_fit_jast+1) = mu_erf * mu_erf
+  coef_gauss_eff_pot(ng_fit_jast+1) =  1.d0 * mu_erf * inv_sq_pi
+
+END_PROVIDER 
+
+! ---
+
+double precision function eff_pot_gauss(x, mu)
+
+  BEGIN_DOC
+  ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
+  END_DOC
+
+  implicit none
+  double precision, intent(in) :: x, mu
+
+  eff_pot_gauss =  mu/dsqrt(dacos(-1.d0)) * dexp(-mu*mu*x*x) - 0.25d0 * (1.d0 - derf(mu*x))**2.d0
+
+end
+
+! -------------------------------------------------------------------------------------------------
+! ---
+
+double precision function eff_pot_fit_gauss(x)
+ implicit none
+ BEGIN_DOC
+ ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2) 
+ ! 
+ ! but fitted with gaussians 
+ END_DOC
+ double precision, intent(in) :: x
+ integer :: i
+ double precision :: alpha
+ eff_pot_fit_gauss = derf(mu_erf*x)/x
+ do i = 1, n_gauss_eff_pot
+  alpha = expo_gauss_eff_pot(i)
+  eff_pot_fit_gauss += coef_gauss_eff_pot(i) * dexp(-alpha*x*x)
+ enddo
+end
+
+BEGIN_PROVIDER [integer, n_fit_1_erf_x]
+ implicit none
+ BEGIN_DOC
+! 
+ END_DOC
+ n_fit_1_erf_x = 2
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [double precision, expos_slat_gauss_1_erf_x, (n_fit_1_erf_x)]
+ implicit none
+ BEGIN_DOC
+! 1 - erf(mu*x) is fitted with a Slater and gaussian as in Eq.A15 of  JCP 154, 084119 (2021)
+!
+! 1 - erf(mu*x) = e^{-expos_slat_gauss_1_erf_x(1) * mu *x} * e^{-expos_slat_gauss_1_erf_x(2) * mu^2 * x^2}
+ END_DOC
+ expos_slat_gauss_1_erf_x(1) = 1.09529d0
+ expos_slat_gauss_1_erf_x(2) = 0.756023d0
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_1_erf_x, (n_max_fit_slat)]
+&BEGIN_PROVIDER [double precision, coef_gauss_1_erf_x, (n_max_fit_slat)]
+
+  BEGIN_DOC
+  !
+  ! (1 - erf(mu*x)) = \sum_i coef_gauss_1_erf_x(i) * exp(-expo_gauss_1_erf_x(i) * x^2)
+  !
+  ! This is based on a fit of (1 - erf(mu*x)) by exp(-alpha * x) exp(-beta*mu^2x^2) 
+  !
+  ! and the slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians 
+  !
+  ! See Appendix 2 of JCP 154, 084119 (2021)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: expos(n_max_fit_slat), alpha, beta
+
+  alpha = expos_slat_gauss_1_erf_x(1) * mu_erf
+  call expo_fit_slater_gam(alpha, expos)
+  beta = expos_slat_gauss_1_erf_x(2) * mu_erf * mu_erf
+ 
+  do i = 1, n_max_fit_slat
+    expo_gauss_1_erf_x(i) = expos(i) + beta
+    coef_gauss_1_erf_x(i) = coef_fit_slat_gauss(i)
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+double precision function fit_1_erf_x(x)
+
+  BEGIN_DOC
+  ! fit_1_erf_x(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x))
+  END_DOC
+
+  implicit none
+  integer :: i
+  double precision, intent(in) :: x
+
+  fit_1_erf_x = 0.d0
+  do i = 1, n_max_fit_slat
+    fit_1_erf_x += dexp(-expo_gauss_1_erf_x(i) *x*x) * coef_gauss_1_erf_x(i)
+  enddo
+
+end
+
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_1_erf_x_2, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_1_erf_x_2, (ng_fit_jast)]
+
+  BEGIN_DOC
+  ! (1 - erf(mu*x))^2 = \sum_i coef_gauss_1_erf_x_2(i) * exp(-expo_gauss_1_erf_x_2(i) * x^2)
+  !
+  ! This is based on a fit of (1 - erf(mu*x)) by exp(-alpha * x) exp(-beta*mu^2x^2)
+  !
+  ! and the slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians 
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: expos(ng_fit_jast), alpha, beta, tmp
+
+  if(ng_fit_jast .eq. 1) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.85345277d0 /)
+    expo_gauss_1_erf_x_2 = (/ 6.23519457d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.31030624d0 , 0.64364964d0 /)
+    expo_gauss_1_erf_x_2 = (/ 55.39184787d0, 3.92151407d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.33206082d0 , 0.52347449d0, 0.12605012d0   /)
+    expo_gauss_1_erf_x_2 = (/ 19.90272209d0, 3.2671671d0 , 336.47320445d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.02956716d0, 0.17025555d0, 0.32774114d0, 0.39034764d0, 0.07822781d0 /)
+    expo_gauss_1_erf_x_2 = (/ 6467.28126d0, 46.9071990d0, 9.09617721d0, 2.76883328d0, 360.367093d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.18331042d0  , 0.10971118d0  , 0.29949169d0  , 0.34853132d0  , 0.0394275d0   , 0.01874444d0   /)
+    expo_gauss_1_erf_x_2 = (/ 2.54293498d+01, 1.40317872d+02, 7.14630801d+00, 2.65517675d+00, 1.45142619d+03, 1.00000000d+04 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.0213619d0   , 0.03221511d0  , 0.29966689d0  , 0.19178934d0  , 0.06154732d0  , 0.28214555d0  , 0.11125985d0   /)
+    expo_gauss_1_erf_x_2 = (/ 1.34727067d+04, 1.27166613d+03, 5.52584567d+00, 1.67753218d+01, 2.46145691d+02, 2.47971820d+00, 5.95141293d+01 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.28189124d0  , 0.19518669d0  , 0.12161735d0  , 0.24257438d0  , 0.07309656d0  , 0.042435d0    , 0.01926109d0  , 0.02393415d0   /)
+    expo_gauss_1_erf_x_2 = (/ 4.69795903d+00, 1.21379451d+01, 3.55527053d+01, 2.39227172d+00, 1.14827721d+02, 4.16320213d+02, 1.52813587d+04, 1.78516557d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_1_erf_x_2 = (/  /)
+  !  expo_gauss_1_erf_x_2 = (/  /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+  !  enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    alpha = 2.d0 * expos_slat_gauss_1_erf_x(1) * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = 2.d0 * expos_slat_gauss_1_erf_x(2) * mu_erf * mu_erf
+    do i = 1, n_max_fit_slat
+      expo_gauss_1_erf_x_2(i) = expos(i) + beta
+      coef_gauss_1_erf_x_2(i) = coef_fit_slat_gauss(i)
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
+
+  endif
+
+END_PROVIDER 
+
+! ---
+
+double precision function fit_1_erf_x_2(x)
+ implicit none
+ double precision, intent(in) :: x
+ BEGIN_DOC
+! fit_1_erf_x_2(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x))^2
+ END_DOC
+ integer :: i
+ fit_1_erf_x_2 = 0.d0
+ do i = 1, n_max_fit_slat
+  fit_1_erf_x_2 += dexp(-expo_gauss_1_erf_x_2(i) *x*x) * coef_gauss_1_erf_x_2(i)
+ enddo
+
+end
+
+subroutine inv_r_times_poly(r, dist_r, dist_vec, poly)
+ implicit none
+ BEGIN_DOC
+! returns 
+!
+! poly(1) = x / sqrt(x^2+y^2+z^2), poly(2) = y / sqrt(x^2+y^2+z^2), poly(3) = z / sqrt(x^2+y^2+z^2)
+!
+! with the arguments  
+!
+! r(1)  = x, r(2) = y, r(3) = z, dist_r = sqrt(x^2+y^2+z^2)
+!
+! dist_vec(1) = sqrt(y^2+z^2), dist_vec(2) = sqrt(x^2+z^2), dist_vec(3) = sqrt(x^2+y^2)
+ END_DOC
+ double precision, intent(in) :: r(3), dist_r, dist_vec(3)
+ double precision, intent(out):: poly(3)
+ double precision :: inv_dist
+ integer :: i
+ if (dist_r.gt. 1.d-8)then
+  inv_dist = 1.d0/dist_r
+  do i = 1, 3
+   poly(i) = r(i) * inv_dist 
+  enddo
+ else
+  do i = 1, 3
+   if(dabs(r(i)).lt.dist_vec(i))then
+    inv_dist = 1.d0/dist_r
+    poly(i) = r(i) * inv_dist 
+   else !if(dabs(r(i)))then
+    poly(i) = 1.d0 
+!    poly(i) = 0.d0 
+   endif
+  enddo
+ endif
+end                      
diff --git a/src/ao_tc_eff_map/providers_ao_eff_pot.irp.f b/src/ao_tc_eff_map/providers_ao_eff_pot.irp.f
new file mode 100644
index 00000000..055bf323
--- /dev/null
+++ b/src/ao_tc_eff_map/providers_ao_eff_pot.irp.f
@@ -0,0 +1,86 @@
+
+BEGIN_PROVIDER [ logical, ao_tc_sym_two_e_pot_in_map ]
+  implicit none
+  use f77_zmq
+  use map_module
+  BEGIN_DOC
+  !  Map of Atomic integrals
+  !     i(r1) j(r2) 1/r12 k(r1) l(r2)
+  END_DOC
+
+  integer                        :: i,j,k,l
+  double precision               :: ao_tc_sym_two_e_pot,cpu_1,cpu_2, wall_1, wall_2
+  double precision               :: integral, wall_0
+  include 'utils/constants.include.F'
+
+  ! For integrals file
+  integer(key_kind),allocatable  :: buffer_i(:)
+  integer,parameter              :: size_buffer = 1024*64
+  real(integral_kind),allocatable :: buffer_value(:)
+
+  integer                        :: n_integrals, rc
+  integer                        :: kk, m, j1, i1, lmax
+  character*(64)                 :: fmt
+
+  !double precision               :: j1b_gauss_coul_debug
+  !integral = j1b_gauss_coul_debug(1,1,1,1)
+
+  integral = ao_tc_sym_two_e_pot(1,1,1,1)
+
+  double precision               :: map_mb
+
+  print*, 'Providing the ao_tc_sym_two_e_pot_map integrals'
+  call wall_time(wall_0)
+  call wall_time(wall_1)
+  call cpu_time(cpu_1)
+
+  integer(ZMQ_PTR) :: zmq_to_qp_run_socket, zmq_socket_pull
+  call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'ao_tc_sym_two_e_pot')
+
+  character(len=:), allocatable :: task
+  allocate(character(len=ao_num*12) :: task)
+  write(fmt,*) '(', ao_num, '(I5,X,I5,''|''))'
+  do l=1,ao_num
+    write(task,fmt) (i,l, i=1,l)
+    integer, external :: add_task_to_taskserver
+    if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task)) == -1) then
+      stop 'Unable to add task to server'
+    endif
+  enddo
+  deallocate(task)
+
+  integer, external :: zmq_set_running
+  if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+    print *,  irp_here, ': Failed in zmq_set_running'
+  endif
+
+  PROVIDE nproc
+  !$OMP PARALLEL DEFAULT(shared) private(i) num_threads(nproc+1)
+      i = omp_get_thread_num()
+      if (i==0) then
+        call ao_tc_sym_two_e_pot_in_map_collector(zmq_socket_pull)
+      else
+        call ao_tc_sym_two_e_pot_in_map_slave_inproc(i)
+      endif
+  !$OMP END PARALLEL
+
+  call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'ao_tc_sym_two_e_pot')
+
+
+  print*, 'Sorting the map'
+  call map_sort(ao_tc_sym_two_e_pot_map)
+  call cpu_time(cpu_2)
+  call wall_time(wall_2)
+  integer(map_size_kind)         :: get_ao_tc_sym_two_e_pot_map_size, ao_eff_pot_map_size
+  ao_eff_pot_map_size = get_ao_tc_sym_two_e_pot_map_size()
+
+  print*, 'AO eff_pot integrals provided:'
+  print*, ' Size of AO eff_pot map :         ', map_mb(ao_tc_sym_two_e_pot_map) ,'MB'
+  print*, ' Number of AO eff_pot integrals :', ao_eff_pot_map_size
+  print*, ' cpu  time :',cpu_2 - cpu_1, 's'
+  print*, ' wall time :',wall_2 - wall_1, 's  ( x ', (cpu_2-cpu_1)/(wall_2-wall_1+tiny(1.d0)), ' )'
+
+  ao_tc_sym_two_e_pot_in_map = .True.
+
+
+END_PROVIDER
diff --git a/src/ao_tc_eff_map/two_e_1bgauss_j1.irp.f b/src/ao_tc_eff_map/two_e_1bgauss_j1.irp.f
new file mode 100644
index 00000000..c36ee9b4
--- /dev/null
+++ b/src/ao_tc_eff_map/two_e_1bgauss_j1.irp.f
@@ -0,0 +1,728 @@
+! ---
+
+double precision function j1b_gauss_2e_j1(i, j, k, l)
+
+  BEGIN_DOC
+  ! 
+  !  integral in the AO basis:
+  !     i(r1) j(r1) f(r12) k(r2) l(r2)
+  !
+  !  with:
+  !     f(r12) = - [ (0.5 - 0.5 erf(mu r12)) / r12 ] (r1-r2) \cdot \sum_A (-2 a_A) [ r1A exp(-aA r1A^2) - r2A exp(-aA r2A^2) ]
+  !            = [ (1 - erf(mu r12) / r12 ] \sum_A a_A [ (r1-RA)^2 exp(-aA r1A^2)
+  !                                                    + (r2-RA)^2 exp(-aA r2A^2) 
+  !                                                    - (r1-RA) \cdot (r2-RA) exp(-aA r1A^2)
+  !                                                    - (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer, intent(in) :: i, j, k, l
+
+  integer             :: p, q, r, s
+  integer             :: num_i, num_j, num_k, num_l, num_ii 
+  integer             :: I_power(3), J_power(3), K_power(3), L_power(3)
+  integer             :: iorder_p(3), iorder_q(3)
+  integer             :: shift_P(3), shift_Q(3)
+  integer             :: dim1
+
+  double precision    :: coef1, coef2, coef3, coef4
+  double precision    :: expo1, expo2, expo3, expo4
+  double precision    :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision    :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision    :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision    :: ff, gg, cx, cy, cz
+
+  double precision    :: j1b_gauss_2e_j1_schwartz
+  
+  if( ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then
+    j1b_gauss_2e_j1 = j1b_gauss_2e_j1_schwartz(i, j, k, l)
+    return
+  endif
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+
+  do p = 1, 3
+    I_power(p)  = ao_power(i,p)
+    J_power(p)  = ao_power(j,p)
+    K_power(p)  = ao_power(k,p)
+    L_power(p)  = ao_power(l,p)
+    I_center(p) = nucl_coord(num_i,p)
+    J_center(p) = nucl_coord(num_j,p)
+    K_center(p) = nucl_coord(num_k,p)
+    L_center(p) = nucl_coord(num_l,p)
+  enddo
+
+  j1b_gauss_2e_j1 = 0.d0
+
+  do p = 1, ao_prim_num(i)
+    coef1 = ao_coef_normalized_ordered_transp(p, i)
+    expo1 = ao_expo_ordered_transp(p, i)
+
+    do q = 1, ao_prim_num(j)
+      coef2 = coef1 * ao_coef_normalized_ordered_transp(q, j)
+      expo2 = ao_expo_ordered_transp(q, j)
+
+      call give_explicit_poly_and_gaussian( P1_new, P1_center, pp1, fact_p1, iorder_p, expo1, expo2 &
+                                          , I_power, J_power, I_center, J_center, dim1 )
+      p1_inv = 1.d0 / pp1
+
+      do r = 1, ao_prim_num(k)
+        coef3 = coef2 * ao_coef_normalized_ordered_transp(r, k)
+        expo3 = ao_expo_ordered_transp(r, k)
+
+        do s = 1, ao_prim_num(l)
+          coef4 = coef3 * ao_coef_normalized_ordered_transp(s, l)
+          expo4 = ao_expo_ordered_transp(s, l)
+ 
+          call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                              , K_power, L_power, K_center, L_center, dim1 )
+          q1_inv = 1.d0 / qq1
+
+          call get_cxcycz_j1( dim1, cx, cy, cz                                  &
+                            , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                            , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+          j1b_gauss_2e_j1 = j1b_gauss_2e_j1 + coef4 * ( cx + cy + cz )
+        enddo ! s
+      enddo  ! r
+    enddo   ! q
+  enddo    ! p
+
+  return
+end function j1b_gauss_2e_j1
+
+! ---
+
+double precision function j1b_gauss_2e_j1_schwartz(i, j, k, l)
+
+  BEGIN_DOC
+  ! 
+  !  integral in the AO basis:
+  !     i(r1) j(r1) f(r12) k(r2) l(r2)
+  !
+  !  with:
+  !     f(r12) = - [ (0.5 - 0.5 erf(mu r12)) / r12 ] (r1-r2) \cdot \sum_A (-2 a_A) [ r1A exp(-aA r1A^2) - r2A exp(-aA r2A^2) ]
+  !            = [ (1 - erf(mu r12) / r12 ] \sum_A a_A [ (r1-RA)^2 exp(-aA r1A^2)
+  !                                                    + (r2-RA)^2 exp(-aA r2A^2) 
+  !                                                    - (r1-RA) \cdot (r2-RA) exp(-aA r1A^2)
+  !                                                    - (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer, intent(in)           :: i, j, k, l
+
+  integer                       :: p, q, r, s
+  integer                       :: num_i, num_j, num_k, num_l, num_ii 
+  integer                       :: I_power(3), J_power(3), K_power(3), L_power(3)
+  integer                       :: iorder_p(3), iorder_q(3)
+  integer                       :: dim1
+
+  double precision              :: coef1, coef2, coef3, coef4
+  double precision              :: expo1, expo2, expo3, expo4
+  double precision              :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision              :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision              :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision              :: cx, cy, cz
+  double precision              :: schwartz_ij, thr
+  double precision, allocatable :: schwartz_kl(:,:) 
+
+  PROVIDE j1b_pen
+
+  dim1 = n_pt_max_integrals
+  thr  = ao_integrals_threshold * ao_integrals_threshold
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+
+  do p = 1, 3
+    I_power(p)  = ao_power(i,p)
+    J_power(p)  = ao_power(j,p)
+    K_power(p)  = ao_power(k,p)
+    L_power(p)  = ao_power(l,p)
+    I_center(p) = nucl_coord(num_i,p)
+    J_center(p) = nucl_coord(num_j,p)
+    K_center(p) = nucl_coord(num_k,p)
+    L_center(p) = nucl_coord(num_l,p)
+  enddo
+
+
+  allocate( schwartz_kl(0:ao_prim_num(l) , 0:ao_prim_num(k)) )
+
+  schwartz_kl(0,0) = 0.d0
+  do r = 1, ao_prim_num(k)
+    expo3 = ao_expo_ordered_transp(r,k)
+    coef3 = ao_coef_normalized_ordered_transp(r,k) * ao_coef_normalized_ordered_transp(r,k)
+
+    schwartz_kl(0,r) = 0.d0
+    do s = 1, ao_prim_num(l)
+      expo4 = ao_expo_ordered_transp(s,l)
+      coef4 = coef3 * ao_coef_normalized_ordered_transp(s,l) * ao_coef_normalized_ordered_transp(s,l)
+
+      call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                          , K_power, L_power, K_center, L_center, dim1 )
+      q1_inv = 1.d0 / qq1
+
+      call get_cxcycz_j1( dim1, cx, cy, cz                                  &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+      schwartz_kl(s,r) = coef4 * dabs( cx + cy + cz )
+      schwartz_kl(0,r) = max( schwartz_kl(0,r) , schwartz_kl(s,r) )
+    enddo
+
+    schwartz_kl(0,0) = max( schwartz_kl(0,r) , schwartz_kl(0,0) )
+  enddo
+
+
+  j1b_gauss_2e_j1_schwartz = 0.d0
+
+  do p = 1, ao_prim_num(i)
+    expo1 = ao_expo_ordered_transp(p, i)
+    coef1 = ao_coef_normalized_ordered_transp(p, i)
+
+    do q = 1, ao_prim_num(j)
+      expo2 = ao_expo_ordered_transp(q, j)
+      coef2 = coef1 * ao_coef_normalized_ordered_transp(q, j)
+
+      call give_explicit_poly_and_gaussian( P1_new, P1_center, pp1, fact_p1, iorder_p, expo1, expo2 &
+                                          , I_power, J_power, I_center, J_center, dim1 )
+      p1_inv = 1.d0 / pp1
+
+      call get_cxcycz_j1( dim1, cx, cy, cz                                  &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p )
+
+      schwartz_ij = coef2 * coef2 * dabs( cx + cy + cz )
+      if( schwartz_kl(0,0) * schwartz_ij < thr ) cycle
+
+      do r = 1, ao_prim_num(k)
+        if( schwartz_kl(0,r) * schwartz_ij < thr ) cycle
+        coef3 = coef2 * ao_coef_normalized_ordered_transp(r, k)
+        expo3 = ao_expo_ordered_transp(r, k)
+
+        do s = 1, ao_prim_num(l)
+          if( schwartz_kl(s,r) * schwartz_ij < thr ) cycle
+          coef4 = coef3 * ao_coef_normalized_ordered_transp(s, l)
+          expo4 = ao_expo_ordered_transp(s, l)
+ 
+          call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                              , K_power, L_power, K_center, L_center, dim1 )
+          q1_inv = 1.d0 / qq1
+
+          call get_cxcycz_j1( dim1, cx, cy, cz                                  &
+                            , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                            , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+          j1b_gauss_2e_j1_schwartz = j1b_gauss_2e_j1_schwartz + coef4 * ( cx + cy + cz )
+        enddo ! s
+      enddo  ! r
+    enddo   ! q
+  enddo    ! p
+
+  deallocate( schwartz_kl )
+
+  return
+end function j1b_gauss_2e_j1_schwartz
+
+! ---
+
+subroutine get_cxcycz_j1( dim1, cx, cy, cz                                  &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: dim1
+  integer,          intent(in)  :: iorder_p(3), iorder_q(3)
+  double precision, intent(in)  :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision, intent(in)  :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision, intent(out) :: cx, cy, cz
+
+  integer                       :: ii
+  integer                       :: shift_P(3), shift_Q(3)
+  double precision              :: expoii, factii, Centerii(3)
+  double precision              :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv
+  double precision              :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv
+  double precision              :: ff, gg
+
+  double precision              :: general_primitive_integral_erf_shifted
+  double precision              :: general_primitive_integral_coul_shifted
+
+  PROVIDE j1b_pen
+
+  cx = 0.d0
+  cy = 0.d0
+  cz = 0.d0
+  do ii = 1, nucl_num
+
+    expoii        = j1b_pen(ii)
+    Centerii(1:3) = nucl_coord(ii, 1:3)
+
+    call gaussian_product(pp1, P1_center, expoii, Centerii, factii, pp2, P2_center)
+    fact_p2 = fact_p1 * factii
+    p2_inv  = 1.d0 / pp2
+    call pol_modif_center( P1_center, P2_center, iorder_p, P1_new, P2_new )
+
+    call gaussian_product(qq1, Q1_center, expoii, Centerii, factii, qq2, Q2_center)
+    fact_q2 = fact_q1 * factii
+    q2_inv  = 1.d0 / qq2
+    call pol_modif_center( Q1_center, Q2_center, iorder_q, Q1_new, Q2_new )
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !                     [ (1-erf(mu r12)) / r12 ] \sum_A a_A [ (r1-RA)^2 exp(-aA r1A^2)
+    ! ----------------------------------------------------------------------------------------------------
+
+    shift_Q = (/ 0, 0, 0 /)
+
+    ! x term:
+    ff = P2_center(1) - Centerii(1) 
+
+    shift_P = (/ 2, 0, 0 /)
+    cx = cx + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 1, 0, 0 /)
+    cx = cx + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cx = cx - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P       &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q       )
+
+    shift_P = (/ 0, 0, 0 /)
+    cx = cx + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cx = cx - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P2_center(2) - Centerii(2) 
+
+    shift_P = (/ 0, 2, 0 /)
+    cy = cy + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 0, 1, 0 /)
+    cy = cy + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cy = cy - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P       &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q       )
+
+    shift_P = (/ 0, 0, 0 /)
+    cy = cy + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cy = cy - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P2_center(3) - Centerii(3) 
+
+    shift_P = (/ 0, 0, 2 /)
+    cz = cz + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 0, 0, 1 /)
+    cz = cz + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cz = cz - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P       &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q       )
+
+    shift_P = (/ 0, 0, 0 /)
+    cz = cz + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cz = cz - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !                     [ (1-erf(mu r12)) / r12 ] \sum_A a_A [ (r2-RA)^2 exp(-aA r2A^2)
+    ! ----------------------------------------------------------------------------------------------------
+
+    shift_P = (/ 0, 0, 0 /)
+
+    ! x term:
+    ff = Q2_center(1) - Centerii(1) 
+
+    shift_Q = (/ 2, 0, 0 /)
+    cx = cx + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cx = cx - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cx = cx - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = Q2_center(2) - Centerii(2) 
+
+    shift_Q = (/ 0, 2, 0 /)
+    cy = cy + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cy = cy - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cy = cy - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = Q2_center(3) - Centerii(3) 
+
+    shift_Q = (/ 0, 0, 2 /)
+    cz = cz + expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz - expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz + expoii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cz = cz - expoii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz + expoii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cz = cz - expoii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !              - [ (1-erf(mu r12)) / r12 ] \sum_A a_A [ (r1-RA) \cdot (r2-RA) exp(-aA r1A^2) ]
+    ! ----------------------------------------------------------------------------------------------------
+
+    ! x term:
+    ff = P2_center(1) - Centerii(1) 
+    gg = Q1_center(1) - Centerii(1) 
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cx = cx + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P2_center(2) - Centerii(2) 
+    gg = Q1_center(2) - Centerii(2) 
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cy = cy + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P2_center(3) - Centerii(3) 
+    gg = Q1_center(3) - Centerii(3) 
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cz = cz + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+             , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !              - [ (1-erf(mu r12)) / r12 ] \sum_A a_A [ (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+    ! ----------------------------------------------------------------------------------------------------
+
+    ! x term:
+    ff = P1_center(1) - Centerii(1) 
+    gg = Q2_center(1) - Centerii(1) 
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cx = cx + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P1_center(2) - Centerii(2) 
+    gg = Q2_center(2) - Centerii(2) 
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cy = cy + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P1_center(3) - Centerii(3) 
+    gg = Q2_center(3) - Centerii(3) 
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * general_primitive_integral_coul_shifted( dim1      &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * general_primitive_integral_erf_shifted( dim1       &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * gg * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * ff * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * ff * general_primitive_integral_erf_shifted( dim1  &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cz = cz + expoii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+             , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+             , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+  enddo
+
+  return
+end subroutine get_cxcycz_j1
+
+! ---
+
diff --git a/src/ao_tc_eff_map/two_e_1bgauss_j2.irp.f b/src/ao_tc_eff_map/two_e_1bgauss_j2.irp.f
new file mode 100644
index 00000000..a61b5336
--- /dev/null
+++ b/src/ao_tc_eff_map/two_e_1bgauss_j2.irp.f
@@ -0,0 +1,729 @@
+! ---
+
+double precision function j1b_gauss_2e_j2(i, j, k, l)
+
+  BEGIN_DOC
+  ! 
+  !  integral in the AO basis:
+  !     i(r1) j(r1) f(r12) k(r2) l(r2)
+  !
+  !  with:
+  !     f(r12) = - [ (0.5 - 0.5 erf(mu r12)) / r12 ] (r1-r2) \cdot \sum_A (-2 a_A c_A) [ r1A exp(-aA r1A^2) - r2A exp(-aA r2A^2) ]
+  !            = [ (1 - erf(mu r12) / r12 ] \sum_A a_A c_A [ (r1-RA)^2 exp(-aA r1A^2)
+  !                                                        + (r2-RA)^2 exp(-aA r2A^2) 
+  !                                                        - (r1-RA) \cdot (r2-RA) exp(-aA r1A^2)
+  !                                                        - (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer, intent(in) :: i, j, k, l
+
+  integer             :: p, q, r, s
+  integer             :: num_i, num_j, num_k, num_l, num_ii 
+  integer             :: I_power(3), J_power(3), K_power(3), L_power(3)
+  integer             :: iorder_p(3), iorder_q(3)
+  integer             :: shift_P(3), shift_Q(3)
+  integer             :: dim1
+
+  double precision    :: coef1, coef2, coef3, coef4
+  double precision    :: expo1, expo2, expo3, expo4
+  double precision    :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision    :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision    :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision    :: ff, gg, cx, cy, cz
+
+  double precision    :: j1b_gauss_2e_j2_schwartz
+  
+  dim1 = n_pt_max_integrals
+
+  if( ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024 ) then
+    j1b_gauss_2e_j2 = j1b_gauss_2e_j2_schwartz(i, j, k, l)
+    return
+  endif
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+
+  do p = 1, 3
+    I_power(p)  = ao_power(i,p)
+    J_power(p)  = ao_power(j,p)
+    K_power(p)  = ao_power(k,p)
+    L_power(p)  = ao_power(l,p)
+    I_center(p) = nucl_coord(num_i,p)
+    J_center(p) = nucl_coord(num_j,p)
+    K_center(p) = nucl_coord(num_k,p)
+    L_center(p) = nucl_coord(num_l,p)
+  enddo
+
+  j1b_gauss_2e_j2 = 0.d0
+
+  do p = 1, ao_prim_num(i)
+    coef1 = ao_coef_normalized_ordered_transp(p, i)
+    expo1 = ao_expo_ordered_transp(p, i)
+
+    do q = 1, ao_prim_num(j)
+      coef2 = coef1 * ao_coef_normalized_ordered_transp(q, j)
+      expo2 = ao_expo_ordered_transp(q, j)
+
+      call give_explicit_poly_and_gaussian( P1_new, P1_center, pp1, fact_p1, iorder_p, expo1, expo2 &
+                                          , I_power, J_power, I_center, J_center, dim1 )
+      p1_inv = 1.d0 / pp1
+
+      do r = 1, ao_prim_num(k)
+        coef3 = coef2 * ao_coef_normalized_ordered_transp(r, k)
+        expo3 = ao_expo_ordered_transp(r, k)
+
+        do s = 1, ao_prim_num(l)
+          coef4 = coef3 * ao_coef_normalized_ordered_transp(s, l)
+          expo4 = ao_expo_ordered_transp(s, l)
+ 
+          call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                              , K_power, L_power, K_center, L_center, dim1 )
+          q1_inv = 1.d0 / qq1
+
+          call get_cxcycz_j2( dim1, cx, cy, cz                                  &
+                            , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                            , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+          j1b_gauss_2e_j2 = j1b_gauss_2e_j2 + coef4 * ( cx + cy + cz )
+        enddo ! s
+      enddo  ! r
+    enddo   ! q
+  enddo    ! p
+
+  return
+end function j1b_gauss_2e_j2
+
+! ---
+
+double precision function j1b_gauss_2e_j2_schwartz(i, j, k, l)
+
+  BEGIN_DOC
+  ! 
+  !  integral in the AO basis:
+  !     i(r1) j(r1) f(r12) k(r2) l(r2)
+  !
+  !  with:
+  !     f(r12) = - [ (0.5 - 0.5 erf(mu r12)) / r12 ] (r1-r2) \cdot \sum_A (-2 a_A c_A) [ r1A exp(-aA r1A^2) - r2A exp(-aA r2A^2) ]
+  !            = [ (1 - erf(mu r12) / r12 ] \sum_A a_A c_A [ (r1-RA)^2 exp(-aA r1A^2)
+  !                                                        + (r2-RA)^2 exp(-aA r2A^2) 
+  !                                                        - (r1-RA) \cdot (r2-RA) exp(-aA r1A^2)
+  !                                                        - (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer, intent(in)           :: i, j, k, l
+
+  integer                       :: p, q, r, s
+  integer                       :: num_i, num_j, num_k, num_l, num_ii 
+  integer                       :: I_power(3), J_power(3), K_power(3), L_power(3)
+  integer                       :: iorder_p(3), iorder_q(3)
+  integer                       :: dim1
+
+  double precision              :: coef1, coef2, coef3, coef4
+  double precision              :: expo1, expo2, expo3, expo4
+  double precision              :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision              :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision              :: I_center(3), J_center(3), K_center(3), L_center(3)
+  double precision              :: cx, cy, cz
+  double precision              :: schwartz_ij, thr
+  double precision, allocatable :: schwartz_kl(:,:) 
+
+  dim1 = n_pt_max_integrals
+  thr  = ao_integrals_threshold * ao_integrals_threshold
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+
+  do p = 1, 3
+    I_power(p)  = ao_power(i,p)
+    J_power(p)  = ao_power(j,p)
+    K_power(p)  = ao_power(k,p)
+    L_power(p)  = ao_power(l,p)
+    I_center(p) = nucl_coord(num_i,p)
+    J_center(p) = nucl_coord(num_j,p)
+    K_center(p) = nucl_coord(num_k,p)
+    L_center(p) = nucl_coord(num_l,p)
+  enddo
+
+
+  allocate( schwartz_kl(0:ao_prim_num(l) , 0:ao_prim_num(k)) )
+
+  schwartz_kl(0,0) = 0.d0
+  do r = 1, ao_prim_num(k)
+    expo3 = ao_expo_ordered_transp(r,k)
+    coef3 = ao_coef_normalized_ordered_transp(r,k) * ao_coef_normalized_ordered_transp(r,k)
+
+    schwartz_kl(0,r) = 0.d0
+    do s = 1, ao_prim_num(l)
+      expo4 = ao_expo_ordered_transp(s,l)
+      coef4 = coef3 * ao_coef_normalized_ordered_transp(s,l) * ao_coef_normalized_ordered_transp(s,l)
+
+      call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                          , K_power, L_power, K_center, L_center, dim1 )
+      q1_inv = 1.d0 / qq1
+
+      call get_cxcycz_j2( dim1, cx, cy, cz                                  &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+      schwartz_kl(s,r) = coef4 * dabs( cx + cy + cz )
+      schwartz_kl(0,r) = max( schwartz_kl(0,r) , schwartz_kl(s,r) )
+    enddo
+
+    schwartz_kl(0,0) = max( schwartz_kl(0,r) , schwartz_kl(0,0) )
+  enddo
+
+
+  j1b_gauss_2e_j2_schwartz = 0.d0
+
+  do p = 1, ao_prim_num(i)
+    expo1 = ao_expo_ordered_transp(p, i)
+    coef1 = ao_coef_normalized_ordered_transp(p, i)
+
+    do q = 1, ao_prim_num(j)
+      expo2 = ao_expo_ordered_transp(q, j)
+      coef2 = coef1 * ao_coef_normalized_ordered_transp(q, j)
+
+      call give_explicit_poly_and_gaussian( P1_new, P1_center, pp1, fact_p1, iorder_p, expo1, expo2 &
+                                          , I_power, J_power, I_center, J_center, dim1 )
+      p1_inv = 1.d0 / pp1
+
+      call get_cxcycz_j2( dim1, cx, cy, cz                                  &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p )
+
+      schwartz_ij = coef2 * coef2 * dabs( cx + cy + cz )
+      if( schwartz_kl(0,0) * schwartz_ij < thr ) cycle
+
+      do r = 1, ao_prim_num(k)
+        if( schwartz_kl(0,r) * schwartz_ij < thr ) cycle
+        coef3 = coef2 * ao_coef_normalized_ordered_transp(r, k)
+        expo3 = ao_expo_ordered_transp(r, k)
+
+        do s = 1, ao_prim_num(l)
+          if( schwartz_kl(s,r) * schwartz_ij < thr ) cycle
+          coef4 = coef3 * ao_coef_normalized_ordered_transp(s, l)
+          expo4 = ao_expo_ordered_transp(s, l)
+ 
+          call give_explicit_poly_and_gaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q, expo3, expo4 &
+                                              , K_power, L_power, K_center, L_center, dim1 )
+          q1_inv = 1.d0 / qq1
+
+          call get_cxcycz_j2( dim1, cx, cy, cz                                  &
+                            , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                            , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+          j1b_gauss_2e_j2_schwartz = j1b_gauss_2e_j2_schwartz + coef4 * ( cx + cy + cz )
+        enddo ! s
+      enddo  ! r
+    enddo   ! q
+  enddo    ! p
+
+  deallocate( schwartz_kl )
+
+  return
+end function j1b_gauss_2e_j2_schwartz
+
+! ---
+
+subroutine get_cxcycz_j2( dim1, cx, cy, cz                                  &
+                        , P1_center, P1_new, pp1, fact_p1, p1_inv, iorder_p &
+                        , Q1_center, Q1_new, qq1, fact_q1, q1_inv, iorder_q )
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: dim1
+  integer,          intent(in)  :: iorder_p(3), iorder_q(3)
+  double precision, intent(in)  :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  double precision, intent(in)  :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  double precision, intent(out) :: cx, cy, cz
+
+  integer                       :: ii
+  integer                       :: shift_P(3), shift_Q(3)
+  double precision              :: coefii, expoii, factii, Centerii(3)
+  double precision              :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv
+  double precision              :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv
+  double precision              :: ff, gg
+
+  double precision              :: general_primitive_integral_erf_shifted
+  double precision              :: general_primitive_integral_coul_shifted
+
+  PROVIDE j1b_pen j1b_coeff
+
+  cx = 0.d0
+  cy = 0.d0
+  cz = 0.d0
+  do ii = 1, nucl_num
+
+    expoii        = j1b_pen  (ii)
+    coefii        = j1b_coeff(ii)
+    Centerii(1:3) = nucl_coord(ii, 1:3)
+
+    call gaussian_product(pp1, P1_center, expoii, Centerii, factii, pp2, P2_center)
+    fact_p2 = fact_p1 * factii
+    p2_inv  = 1.d0 / pp2
+    call pol_modif_center( P1_center, P2_center, iorder_p, P1_new, P2_new )
+
+    call gaussian_product(qq1, Q1_center, expoii, Centerii, factii, qq2, Q2_center)
+    fact_q2 = fact_q1 * factii
+    q2_inv  = 1.d0 / qq2
+    call pol_modif_center( Q1_center, Q2_center, iorder_q, Q1_new, Q2_new )
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !                     [ (1-erf(mu r12)) / r12 ] \sum_A a_A c_A [ (r1-RA)^2 exp(-aA r1A^2)
+    ! ----------------------------------------------------------------------------------------------------
+
+    shift_Q = (/ 0, 0, 0 /)
+
+    ! x term:
+    ff = P2_center(1) - Centerii(1) 
+
+    shift_P = (/ 2, 0, 0 /)
+    cx = cx + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 1, 0, 0 /)
+    cx = cx + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cx = cx - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+
+    shift_P = (/ 0, 0, 0 /)
+    cx = cx + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cx = cx - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P2_center(2) - Centerii(2) 
+
+    shift_P = (/ 0, 2, 0 /)
+    cy = cy + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 0, 1, 0 /)
+    cy = cy + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cy = cy - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P       &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q       )
+
+    shift_P = (/ 0, 0, 0 /)
+    cy = cy + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cy = cy - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P2_center(3) - Centerii(3) 
+
+    shift_P = (/ 0, 0, 2 /)
+    cz = cz + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_P = (/ 0, 0, 1 /)
+    cz = cz + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P        &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q        )
+    cz = cz - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P       &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q       )
+
+    shift_P = (/ 0, 0, 0 /)
+    cz = cz + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cz = cz - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !                     [ (1-erf(mu r12)) / r12 ] \sum_A a_A c_A [ (r2-RA)^2 exp(-aA r2A^2)
+    ! ----------------------------------------------------------------------------------------------------
+
+    shift_P = (/ 0, 0, 0 /)
+
+    ! x term:
+    ff = Q2_center(1) - Centerii(1) 
+
+    shift_Q = (/ 2, 0, 0 /)
+    cx = cx + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cx = cx - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cx = cx - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = Q2_center(2) - Centerii(2) 
+
+    shift_Q = (/ 0, 2, 0 /)
+    cy = cy + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cy = cy - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cy = cy - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = Q2_center(3) - Centerii(3) 
+
+    shift_Q = (/ 0, 0, 2 /)
+    cz = cz + expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz - expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz + expoii * coefii * 2.d0 * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P        &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q        )
+    cz = cz - expoii * coefii * 2.d0 * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P       &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q       )
+
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz + expoii * coefii * ff * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cz = cz - expoii * coefii * ff * ff * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !              - [ (1-erf(mu r12)) / r12 ] \sum_A a_A c_A [ (r1-RA) \cdot (r2-RA) exp(-aA r1A^2) ]
+    ! ----------------------------------------------------------------------------------------------------
+
+    ! x term:
+    ff = P2_center(1) - Centerii(1) 
+    gg = Q1_center(1) - Centerii(1) 
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cx = cx + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P2_center(2) - Centerii(2) 
+    gg = Q1_center(2) - Centerii(2) 
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cy = cy + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P2_center(3) - Centerii(3) 
+    gg = Q1_center(3) - Centerii(3) 
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P      &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q      )
+    cz = cz + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p, shift_P     &
+                      , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+
+
+    ! ----------------------------------------------------------------------------------------------------
+    !              - [ (1-erf(mu r12)) / r12 ] \sum_A a_A c_A [ (r1-RA) \cdot (r2-RA) exp(-aA r2A^2) ]
+    ! ----------------------------------------------------------------------------------------------------
+
+    ! x term:
+    ff = P1_center(1) - Centerii(1) 
+    gg = Q2_center(1) - Centerii(1) 
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 1, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 1, 0, 0 /)
+    cx = cx - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cx = cx + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cx = cx - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cx = cx + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! y term:
+    ff = P1_center(2) - Centerii(2) 
+    gg = Q2_center(2) - Centerii(2) 
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 1, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 1, 0 /)
+    cy = cy - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cy = cy + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cy = cy - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cy = cy + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! z term:
+    ff = P1_center(3) - Centerii(3) 
+    gg = Q2_center(3) - Centerii(3) 
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * coefii * general_primitive_integral_coul_shifted( dim1      &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * general_primitive_integral_erf_shifted( dim1       &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 1 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * coefii * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * gg * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 1 /)
+    cz = cz - expoii * coefii * ff * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+    cz = cz + expoii * coefii * ff * general_primitive_integral_erf_shifted( dim1  &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q )
+
+    shift_p = (/ 0, 0, 0 /)
+    shift_Q = (/ 0, 0, 0 /)
+    cz = cz - expoii * coefii * ff * gg * general_primitive_integral_coul_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P      &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q      )
+    cz = cz + expoii * coefii * ff * gg * general_primitive_integral_erf_shifted( dim1 &
+                      , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p, shift_P     &
+                      , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q, shift_Q     )
+
+    ! ----------------------------------------------------------------------------------------------------
+
+  enddo
+
+  return
+end subroutine get_cxcycz_j2
+
+! ---
+
diff --git a/src/ao_tc_eff_map/two_e_ints_gauss.irp.f b/src/ao_tc_eff_map/two_e_ints_gauss.irp.f
new file mode 100644
index 00000000..51ef73a0
--- /dev/null
+++ b/src/ao_tc_eff_map/two_e_ints_gauss.irp.f
@@ -0,0 +1,327 @@
+double precision function ao_tc_sym_two_e_pot(i,j,k,l)
+  implicit none
+  BEGIN_DOC
+  !  integral of the AO basis <ik|jl> or (ij|kl)
+  !     i(r1) j(r1) (tc_pot(r12,mu)) k(r2) l(r2)
+  !
+  ! where (tc_pot(r12,mu)) is the scalar part of the potential EXCLUDING the term erf(mu r12)/r12. 
+  !
+  ! See Eq. (32) of JCP 154, 084119 (2021). 
+  END_DOC
+  integer,intent(in)             :: i,j,k,l
+  integer                        :: p,q,r,s
+  double precision               :: I_center(3),J_center(3),K_center(3),L_center(3)
+  integer                        :: num_i,num_j,num_k,num_l,dim1,I_power(3),J_power(3),K_power(3),L_power(3)
+  double precision               :: integral
+  include 'utils/constants.include.F'
+  double precision               :: P_new(0:max_dim,3),P_center(3),fact_p,pp
+  double precision               :: Q_new(0:max_dim,3),Q_center(3),fact_q,qq
+  integer                        :: iorder_p(3), iorder_q(3)
+  double precision, allocatable  :: schwartz_kl(:,:)
+  double precision               :: schwartz_ij
+  double precision :: scw_gauss_int,general_primitive_integral_gauss
+
+  dim1 = n_pt_max_integrals
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+  ao_tc_sym_two_e_pot = 0.d0
+  double precision               :: thr
+  thr = ao_integrals_threshold*ao_integrals_threshold
+
+  allocate(schwartz_kl(0:ao_prim_num(l),0:ao_prim_num(k)))
+
+      double precision               :: coef3
+      double precision               :: coef2
+      double precision               :: p_inv,q_inv
+      double precision               :: coef1
+      double precision               :: coef4
+
+    do p = 1, 3
+      I_power(p) = ao_power(i,p)
+      J_power(p) = ao_power(j,p)
+      K_power(p) = ao_power(k,p)
+      L_power(p) = ao_power(l,p)
+      I_center(p) = nucl_coord(num_i,p)
+      J_center(p) = nucl_coord(num_j,p)
+      K_center(p) = nucl_coord(num_k,p)
+      L_center(p) = nucl_coord(num_l,p)
+    enddo
+
+    schwartz_kl(0,0) = 0.d0
+    do r = 1, ao_prim_num(k)
+      coef1 = ao_coef_normalized_ordered_transp(r,k)*ao_coef_normalized_ordered_transp(r,k)
+      schwartz_kl(0,r) = 0.d0
+      do s = 1, ao_prim_num(l)
+        coef2 = coef1 * ao_coef_normalized_ordered_transp(s,l) * ao_coef_normalized_ordered_transp(s,l)
+        call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q,&
+            ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),                 &
+            K_power,L_power,K_center,L_center,dim1)
+        q_inv = 1.d0/qq
+        scw_gauss_int = general_primitive_integral_gauss(dim1,              &
+                Q_new,Q_center,fact_q,qq,q_inv,iorder_q,             &
+                Q_new,Q_center,fact_q,qq,q_inv,iorder_q)
+
+        schwartz_kl(s,r) = dabs(scw_gauss_int * coef2)
+        schwartz_kl(0,r) = max(schwartz_kl(0,r),schwartz_kl(s,r))
+      enddo
+      schwartz_kl(0,0) = max(schwartz_kl(0,r),schwartz_kl(0,0))
+    enddo
+    do p = 1, ao_prim_num(i)
+      coef1 = ao_coef_normalized_ordered_transp(p,i)
+      do q = 1, ao_prim_num(j)
+        coef2 = coef1*ao_coef_normalized_ordered_transp(q,j)
+        call give_explicit_poly_and_gaussian(P_new,P_center,pp,fact_p,iorder_p,&
+            ao_expo_ordered_transp(p,i),ao_expo_ordered_transp(q,j),                 &
+            I_power,J_power,I_center,J_center,dim1)
+        p_inv = 1.d0/pp
+        scw_gauss_int = general_primitive_integral_gauss(dim1,              &
+                P_new,P_center,fact_p,pp,p_inv,iorder_p,             &
+                P_new,P_center,fact_p,pp,p_inv,iorder_p)
+        schwartz_ij = dabs(scw_gauss_int * coef2*coef2)
+        if (schwartz_kl(0,0)*schwartz_ij < thr) then
+           cycle
+        endif
+        do r = 1, ao_prim_num(k)
+          if (schwartz_kl(0,r)*schwartz_ij < thr) then
+             cycle
+          endif
+          coef3 = coef2*ao_coef_normalized_ordered_transp(r,k)
+          do s = 1, ao_prim_num(l)
+            if (schwartz_kl(s,r)*schwartz_ij < thr) then
+               cycle
+            endif
+            coef4 = coef3*ao_coef_normalized_ordered_transp(s,l)
+            call give_explicit_poly_and_gaussian(Q_new,Q_center,qq,fact_q,iorder_q, &
+                ao_expo_ordered_transp(r,k),ao_expo_ordered_transp(s,l),            &
+                K_power,L_power,K_center,L_center,dim1)
+            q_inv = 1.d0/qq
+            integral = general_primitive_integral_gauss(dim1,              &
+                P_new,P_center,fact_p,pp,p_inv,iorder_p,             &
+                Q_new,Q_center,fact_q,qq,q_inv,iorder_q)
+            ao_tc_sym_two_e_pot = ao_tc_sym_two_e_pot + coef4 * integral
+          enddo ! s
+        enddo  ! r
+      enddo   ! q
+    enddo    ! p
+
+  deallocate (schwartz_kl)
+
+end
+
+
+double precision function general_primitive_integral_gauss(dim,      &
+      P_new,P_center,fact_p,p,p_inv,iorder_p,                        &
+      Q_new,Q_center,fact_q,q,q_inv,iorder_q)
+  implicit none
+  BEGIN_DOC
+  ! Computes the integral <pq|rs> where p,q,r,s are Gaussian primitives
+  END_DOC
+  integer,intent(in)             :: dim
+  include 'utils/constants.include.F'
+  double precision, intent(in)   :: P_new(0:max_dim,3),P_center(3),fact_p,p,p_inv
+  double precision, intent(in)   :: Q_new(0:max_dim,3),Q_center(3),fact_q,q,q_inv
+  integer, intent(in)            :: iorder_p(3)
+  integer, intent(in)            :: iorder_q(3)
+
+  double precision               :: r_cut,gama_r_cut,rho,dist
+  double precision               :: dx(0:max_dim),Ix_pol(0:max_dim),dy(0:max_dim),Iy_pol(0:max_dim),dz(0:max_dim),Iz_pol(0:max_dim)
+  integer                        :: n_Ix,n_Iy,n_Iz,nx,ny,nz
+  double precision               :: bla
+  integer                        :: ix,iy,iz,jx,jy,jz,i
+  double precision               :: a,b,c,d,e,f,accu,pq,const
+  double precision               :: pq_inv, p10_1, p10_2, p01_1, p01_2,pq_inv_2
+  integer                        :: n_pt_tmp,n_pt_out, iorder
+  double precision               :: d1(0:max_dim),d_poly(0:max_dim),rint,d1_screened(0:max_dim)
+  double precision :: thr
+
+  thr = ao_integrals_threshold
+
+  general_primitive_integral_gauss = 0.d0
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx,Ix_pol,dy,Iy_pol,dz,Iz_pol
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly
+
+  ! Gaussian Product
+  ! ----------------
+
+  pq = p_inv*0.5d0*q_inv
+  pq_inv = 0.5d0/(p+q)
+  p10_1 = q*pq  ! 1/(2p)
+  p01_1 = p*pq  ! 1/(2q)
+  pq_inv_2 = pq_inv+pq_inv
+  p10_2 = pq_inv_2 * p10_1*q !0.5d0*q/(pq + p*p)
+  p01_2 = pq_inv_2 * p01_1*p !0.5d0*p/(q*q + pq)
+
+
+  accu = 0.d0
+  iorder = iorder_p(1)+iorder_q(1)+iorder_p(1)+iorder_q(1)
+  do ix=0,iorder
+    Ix_pol(ix) = 0.d0
+  enddo
+  n_Ix = 0
+  do ix = 0, iorder_p(1)
+    if (abs(P_new(ix,1)) < thr) cycle
+    a = P_new(ix,1)
+    do jx = 0, iorder_q(1)
+      d = a*Q_new(jx,1)
+      if (abs(d) < thr) cycle
+      !DIR$ FORCEINLINE
+      call give_polynom_mult_center_x(P_center(1),Q_center(1),ix,jx,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dx,nx)
+      !DIR$ FORCEINLINE
+      call add_poly_multiply(dx,nx,d,Ix_pol,n_Ix)
+    enddo
+  enddo
+  if (n_Ix == -1) then
+    return
+  endif
+  iorder = iorder_p(2)+iorder_q(2)+iorder_p(2)+iorder_q(2)
+  do ix=0, iorder
+    Iy_pol(ix) = 0.d0
+  enddo
+  n_Iy = 0
+  do iy = 0, iorder_p(2)
+    if (abs(P_new(iy,2)) > thr) then
+      b = P_new(iy,2)
+      do jy = 0, iorder_q(2)
+        e = b*Q_new(jy,2)
+        if (abs(e) < thr) cycle
+        !DIR$ FORCEINLINE
+        call   give_polynom_mult_center_x(P_center(2),Q_center(2),iy,jy,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dy,ny)
+        !DIR$ FORCEINLINE
+        call add_poly_multiply(dy,ny,e,Iy_pol,n_Iy)
+      enddo
+    endif
+  enddo
+  if (n_Iy == -1) then
+    return
+  endif
+
+  iorder = iorder_p(3)+iorder_q(3)+iorder_p(3)+iorder_q(3)
+  do ix=0,iorder
+    Iz_pol(ix) = 0.d0
+  enddo
+  n_Iz = 0
+  do iz = 0, iorder_p(3)
+    if (abs(P_new(iz,3)) > thr) then
+      c = P_new(iz,3)
+      do jz = 0, iorder_q(3)
+        f = c*Q_new(jz,3)
+        if (abs(f) < thr) cycle
+        !DIR$ FORCEINLINE
+        call   give_polynom_mult_center_x(P_center(3),Q_center(3),iz,jz,p,q,iorder,pq_inv,pq_inv_2,p10_1,p01_1,p10_2,p01_2,dz,nz)
+        !DIR$ FORCEINLINE
+        call add_poly_multiply(dz,nz,f,Iz_pol,n_Iz)
+      enddo
+    endif
+  enddo
+  if (n_Iz == -1) then
+    return
+  endif
+
+  rho = p*q *pq_inv_2
+  dist =  (P_center(1) - Q_center(1))*(P_center(1) - Q_center(1)) +  &
+      (P_center(2) - Q_center(2))*(P_center(2) - Q_center(2)) +      &
+      (P_center(3) - Q_center(3))*(P_center(3) - Q_center(3))
+  const = dist*rho
+
+  n_pt_tmp = n_Ix+n_Iy
+  do i=0,n_pt_tmp
+    d_poly(i)=0.d0
+  enddo
+
+  !DIR$ FORCEINLINE
+  call multiply_poly(Ix_pol,n_Ix,Iy_pol,n_Iy,d_poly,n_pt_tmp)
+  if (n_pt_tmp == -1) then
+    return
+  endif
+  n_pt_out = n_pt_tmp+n_Iz
+  do i=0,n_pt_out
+    d1(i)=0.d0
+  enddo
+
+  !DIR$ FORCEINLINE
+  call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out)
+
+  double precision :: aa,c_a,t_a,rho_old,w_a,pi_3,prefactor,inv_pq_3_2
+  double precision :: gauss_int
+  integer :: m
+  gauss_int = 0.d0
+  pi_3 = pi*pi*pi
+  inv_pq_3_2 = (p_inv * q_inv)**(1.5d0)
+  rho_old = (p*q)/(p+q)
+  prefactor = pi_3 * inv_pq_3_2 * fact_p * fact_q 
+  do i = 1, n_gauss_eff_pot ! browse the gaussians with different expo/coef
+  !do i = 1, n_gauss_eff_pot-1
+   aa = expo_gauss_eff_pot(i) 
+   c_a = coef_gauss_eff_pot(i)
+   t_a = dsqrt( aa /(rho_old + aa) ) 
+   w_a = dexp(-t_a*t_a*rho_old*dist)
+   accu = 0.d0
+   ! evaluation of the polynom Ix(t_a) * Iy(t_a) * Iz(t_a)
+   do m = 0, n_pt_out,2
+    accu += d1(m) * (t_a)**(dble(m)) 
+   enddo
+   ! equation A8 of PRA-70-062505 (2004) of Toul. Col. Sav. 
+   gauss_int = gauss_int + c_a * prefactor * (1.d0 - t_a*t_a)**(1.5d0) * w_a * accu
+  enddo
+
+  general_primitive_integral_gauss = gauss_int
+end
+
+subroutine compute_ao_integrals_gauss_jl(j,l,n_integrals,buffer_i,buffer_value)
+  implicit none
+  use map_module
+  BEGIN_DOC
+  !  Parallel client for AO integrals
+  END_DOC
+
+  integer, intent(in)            :: j,l
+  integer,intent(out)            :: n_integrals
+  integer(key_kind),intent(out)  :: buffer_i(ao_num*ao_num)
+  real(integral_kind),intent(out) :: buffer_value(ao_num*ao_num)
+
+  integer                        :: i,k
+  double precision               :: cpu_1,cpu_2, wall_1, wall_2
+  double precision               :: integral, wall_0
+  double precision               :: thr,ao_tc_sym_two_e_pot
+  integer                        :: kk, m, j1, i1
+  logical, external              :: ao_two_e_integral_zero
+
+  thr = ao_integrals_threshold
+
+  n_integrals = 0
+
+  j1 = j+ishft(l*l-l,-1)
+  do k = 1, ao_num           ! r1
+    i1 = ishft(k*k-k,-1)
+    if (i1 > j1) then
+      exit
+    endif
+    do i = 1, k
+      i1 += 1
+      if (i1 > j1) then
+        exit
+      endif
+!      if (ao_two_e_integral_zero(i,j,k,l)) then
+      if (.False.) then
+        cycle
+      endif
+      if (ao_two_e_integral_erf_schwartz(i,k)*ao_two_e_integral_erf_schwartz(j,l) < thr ) then
+        cycle
+      endif
+      !DIR$ FORCEINLINE
+      integral = ao_tc_sym_two_e_pot(i,k,j,l)  ! i,k : r1    j,l : r2
+      if (abs(integral) < thr) then
+        cycle
+      endif
+      n_integrals += 1
+      !DIR$ FORCEINLINE
+      call two_e_integrals_index(i,j,k,l,buffer_i(n_integrals))
+      buffer_value(n_integrals) = integral
+    enddo
+  enddo
+
+end
diff --git a/src/ao_tc_eff_map/useful_sub.irp.f b/src/ao_tc_eff_map/useful_sub.irp.f
new file mode 100644
index 00000000..4cfdcad2
--- /dev/null
+++ b/src/ao_tc_eff_map/useful_sub.irp.f
@@ -0,0 +1,364 @@
+! ---
+
+!______________________________________________________________________________________________________________________
+!______________________________________________________________________________________________________________________
+
+double precision function general_primitive_integral_coul_shifted( dim                                                  &
+                                                                 , P_new, P_center, fact_p, p, p_inv, iorder_p, shift_P &
+                                                                 , Q_new, Q_center, fact_q, q, q_inv, iorder_q, shift_Q )
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in) :: dim
+  integer,          intent(in) :: iorder_p(3), shift_P(3)
+  integer,          intent(in) :: iorder_q(3), shift_Q(3)
+  double precision, intent(in) :: P_new(0:max_dim,3), P_center(3), fact_p, p, p_inv
+  double precision, intent(in) :: Q_new(0:max_dim,3), Q_center(3), fact_q, q, q_inv
+
+  integer                      :: n_Ix, n_Iy, n_Iz, nx, ny, nz
+  integer                      :: ix, iy, iz, jx, jy, jz, i
+  integer                      :: n_pt_tmp, n_pt_out, iorder
+  integer                      :: ii, jj
+  double precision             :: rho, dist
+  double precision             :: dx(0:max_dim), Ix_pol(0:max_dim)
+  double precision             :: dy(0:max_dim), Iy_pol(0:max_dim)
+  double precision             :: dz(0:max_dim), Iz_pol(0:max_dim)
+  double precision             :: a, b, c, d, e, f, accu, pq, const
+  double precision             :: pq_inv, p10_1, p10_2, p01_1, p01_2, pq_inv_2
+  double precision             :: d1(0:max_dim), d_poly(0:max_dim)
+  double precision             :: p_plus_q
+
+  double precision             :: rint_sum
+
+  general_primitive_integral_coul_shifted = 0.d0
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx, Ix_pol, dy, Iy_pol, dz, Iz_pol
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly
+
+  ! Gaussian Product
+  ! ----------------
+  p_plus_q = (p+q) 
+  pq       = p_inv * 0.5d0 * q_inv
+  pq_inv   = 0.5d0 / p_plus_q
+  p10_1    = q * pq             ! 1/(2p)
+  p01_1    = p * pq             ! 1/(2q)
+  pq_inv_2 = pq_inv + pq_inv
+  p10_2    = pq_inv_2 * p10_1 * q ! 0.5d0 * q / (pq + p*p)
+  p01_2    = pq_inv_2 * p01_1 * p ! 0.5d0 * p / (q*q + pq)
+
+  accu = 0.d0
+
+  iorder = iorder_p(1) + iorder_q(1) + iorder_p(1) + iorder_q(1)
+  iorder = iorder + shift_P(1) + shift_Q(1)
+  iorder = iorder + shift_P(1) + shift_Q(1)
+  !DIR$ VECTOR ALIGNED
+  do ix = 0, iorder
+    Ix_pol(ix) = 0.d0
+  enddo
+  n_Ix = 0
+  do ix = 0, iorder_p(1)
+
+    ii = ix + shift_P(1)
+    a  = P_new(ix,1)
+    if(abs(a) < thresh) cycle
+
+    do jx = 0, iorder_q(1)
+
+      jj = jx + shift_Q(1)
+      d  = a * Q_new(jx,1)
+      if(abs(d) < thresh) cycle
+
+      !DEC$ FORCEINLINE
+      call give_polynom_mult_center_x( P_center(1), Q_center(1), ii, jj &
+                                     , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dx, nx )
+      !DEC$ FORCEINLINE
+      call add_poly_multiply(dx, nx, d, Ix_pol, n_Ix)
+    enddo
+  enddo
+  if(n_Ix == -1) then
+    return
+  endif
+
+  iorder = iorder_p(2) + iorder_q(2) + iorder_p(2) + iorder_q(2)
+  iorder = iorder + shift_P(2) + shift_Q(2)
+  iorder = iorder + shift_P(2) + shift_Q(2)
+  !DIR$ VECTOR ALIGNED
+  do ix = 0, iorder
+    Iy_pol(ix) = 0.d0
+  enddo
+  n_Iy = 0
+  do iy = 0, iorder_p(2)
+
+    if(abs(P_new(iy,2)) > thresh) then
+
+      ii = iy + shift_P(2)
+      b  = P_new(iy,2)
+
+      do jy = 0, iorder_q(2)
+
+        jj = jy + shift_Q(2)
+        e  = b * Q_new(jy,2)
+        if(abs(e) < thresh) cycle
+
+        !DEC$ FORCEINLINE
+        call give_polynom_mult_center_x( P_center(2), Q_center(2), ii, jj &
+                                       , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dy, ny )
+        !DEC$ FORCEINLINE
+        call add_poly_multiply(dy, ny, e, Iy_pol, n_Iy)
+      enddo
+    endif
+  enddo
+  if(n_Iy == -1) then
+    return
+  endif
+
+  iorder = iorder_p(3) + iorder_q(3) + iorder_p(3) + iorder_q(3)
+  iorder = iorder + shift_P(3) + shift_Q(3)
+  iorder = iorder + shift_P(3) + shift_Q(3)
+  do ix = 0, iorder
+    Iz_pol(ix) = 0.d0
+  enddo
+  n_Iz = 0
+  do iz = 0, iorder_p(3)
+
+    if( abs(P_new(iz,3)) > thresh ) then
+
+      ii = iz + shift_P(3)
+      c  = P_new(iz,3)
+
+      do jz = 0, iorder_q(3)
+
+        jj = jz + shift_Q(3)
+        f  = c * Q_new(jz,3)
+        if(abs(f) < thresh) cycle
+
+        !DEC$ FORCEINLINE
+        call give_polynom_mult_center_x( P_center(3), Q_center(3), ii, jj &
+                                       , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dz, nz )
+        !DEC$ FORCEINLINE
+        call add_poly_multiply(dz, nz, f, Iz_pol, n_Iz)
+      enddo
+    endif
+  enddo
+  if(n_Iz == -1) then
+    return
+  endif
+
+  rho = p * q * pq_inv_2
+  dist = (P_center(1) - Q_center(1)) * (P_center(1) - Q_center(1)) &
+       + (P_center(2) - Q_center(2)) * (P_center(2) - Q_center(2)) &
+       + (P_center(3) - Q_center(3)) * (P_center(3) - Q_center(3))
+  const = dist*rho
+
+  n_pt_tmp = n_Ix + n_Iy
+  do i = 0, n_pt_tmp
+    d_poly(i) = 0.d0
+  enddo
+
+  !DEC$ FORCEINLINE
+  call multiply_poly(Ix_pol, n_Ix, Iy_pol, n_Iy, d_poly, n_pt_tmp)
+  if(n_pt_tmp == -1) then
+    return
+  endif
+  n_pt_out = n_pt_tmp + n_Iz
+  do i = 0, n_pt_out
+    d1(i) = 0.d0
+  enddo
+
+  !DEC$ FORCEINLINE
+  call multiply_poly(d_poly, n_pt_tmp, Iz_pol, n_Iz, d1, n_pt_out)
+  accu = accu + rint_sum(n_pt_out, const, d1)
+
+  general_primitive_integral_coul_shifted = fact_p * fact_q * accu * pi_5_2 * p_inv * q_inv / dsqrt(p_plus_q)
+
+  return
+end function general_primitive_integral_coul_shifted
+!______________________________________________________________________________________________________________________
+!______________________________________________________________________________________________________________________
+
+
+
+!______________________________________________________________________________________________________________________
+!______________________________________________________________________________________________________________________
+
+double precision function general_primitive_integral_erf_shifted( dim                                                  &
+                                                                , P_new, P_center, fact_p, p, p_inv, iorder_p, shift_P &
+                                                                , Q_new, Q_center, fact_q, q, q_inv, iorder_q, shift_Q )
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in) :: dim
+  integer,          intent(in) :: iorder_p(3), shift_P(3)
+  integer,          intent(in) :: iorder_q(3), shift_Q(3)
+  double precision, intent(in) :: P_new(0:max_dim,3), P_center(3), fact_p, p, p_inv
+  double precision, intent(in) :: Q_new(0:max_dim,3), Q_center(3), fact_q, q, q_inv
+
+  integer                      :: n_Ix, n_Iy, n_Iz, nx, ny, nz
+  integer                      :: ix, iy, iz, jx, jy, jz, i
+  integer                      :: n_pt_tmp, n_pt_out, iorder
+  integer                      :: ii, jj
+  double precision             :: rho, dist
+  double precision             :: dx(0:max_dim), Ix_pol(0:max_dim)
+  double precision             :: dy(0:max_dim), Iy_pol(0:max_dim)
+  double precision             :: dz(0:max_dim), Iz_pol(0:max_dim)
+  double precision             :: a, b, c, d, e, f, accu, pq, const
+  double precision             :: pq_inv, p10_1, p10_2, p01_1, p01_2, pq_inv_2
+  double precision             :: d1(0:max_dim), d_poly(0:max_dim)
+  double precision             :: p_plus_q
+
+  double precision             :: rint_sum
+
+  general_primitive_integral_erf_shifted = 0.d0
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx, Ix_pol, dy, Iy_pol, dz, Iz_pol
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly
+
+  ! Gaussian Product
+  ! ----------------
+  p_plus_q = (p+q) * ( (p*q)/(p+q) + mu_erf*mu_erf ) / (mu_erf*mu_erf)
+  pq       = p_inv * 0.5d0 * q_inv
+  pq_inv   = 0.5d0 / p_plus_q
+  p10_1    = q * pq             ! 1/(2p)
+  p01_1    = p * pq             ! 1/(2q)
+  pq_inv_2 = pq_inv + pq_inv
+  p10_2    = pq_inv_2 * p10_1 * q ! 0.5d0 * q / (pq + p*p)
+  p01_2    = pq_inv_2 * p01_1 * p ! 0.5d0 * p / (q*q + pq)
+
+  accu = 0.d0
+
+  iorder = iorder_p(1) + iorder_q(1) + iorder_p(1) + iorder_q(1)
+  iorder = iorder + shift_P(1) + shift_Q(1)
+  iorder = iorder + shift_P(1) + shift_Q(1)
+  !DIR$ VECTOR ALIGNED
+  do ix = 0, iorder
+    Ix_pol(ix) = 0.d0
+  enddo
+  n_Ix = 0
+  do ix = 0, iorder_p(1)
+
+    ii = ix + shift_P(1)
+    a  = P_new(ix,1)
+    if(abs(a) < thresh) cycle
+
+    do jx = 0, iorder_q(1)
+
+      jj = jx + shift_Q(1)
+      d  = a * Q_new(jx,1)
+      if(abs(d) < thresh) cycle
+
+      !DEC$ FORCEINLINE
+      call give_polynom_mult_center_x( P_center(1), Q_center(1), ii, jj &
+                                     , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dx, nx )
+      !DEC$ FORCEINLINE
+      call add_poly_multiply(dx, nx, d, Ix_pol, n_Ix)
+    enddo
+  enddo
+  if(n_Ix == -1) then
+    return
+  endif
+
+  iorder = iorder_p(2) + iorder_q(2) + iorder_p(2) + iorder_q(2)
+  iorder = iorder + shift_P(2) + shift_Q(2)
+  iorder = iorder + shift_P(2) + shift_Q(2)
+  !DIR$ VECTOR ALIGNED
+  do ix = 0, iorder
+    Iy_pol(ix) = 0.d0
+  enddo
+  n_Iy = 0
+  do iy = 0, iorder_p(2)
+
+    if(abs(P_new(iy,2)) > thresh) then
+
+      ii = iy + shift_P(2)
+      b  = P_new(iy,2)
+
+      do jy = 0, iorder_q(2)
+
+        jj = jy + shift_Q(2)
+        e  = b * Q_new(jy,2)
+        if(abs(e) < thresh) cycle
+
+        !DEC$ FORCEINLINE
+        call give_polynom_mult_center_x( P_center(2), Q_center(2), ii, jj &
+                                       , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dy, ny )
+        !DEC$ FORCEINLINE
+        call add_poly_multiply(dy, ny, e, Iy_pol, n_Iy)
+      enddo
+    endif
+  enddo
+  if(n_Iy == -1) then
+    return
+  endif
+
+  iorder = iorder_p(3) + iorder_q(3) + iorder_p(3) + iorder_q(3)
+  iorder = iorder + shift_P(3) + shift_Q(3)
+  iorder = iorder + shift_P(3) + shift_Q(3)
+  do ix = 0, iorder
+    Iz_pol(ix) = 0.d0
+  enddo
+  n_Iz = 0
+  do iz = 0, iorder_p(3)
+
+    if( abs(P_new(iz,3)) > thresh ) then
+
+      ii = iz + shift_P(3)
+      c  = P_new(iz,3)
+
+      do jz = 0, iorder_q(3)
+
+        jj = jz + shift_Q(3)
+        f  = c * Q_new(jz,3)
+        if(abs(f) < thresh) cycle
+
+        !DEC$ FORCEINLINE
+        call give_polynom_mult_center_x( P_center(3), Q_center(3), ii, jj &
+                                       , p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dz, nz )
+        !DEC$ FORCEINLINE
+        call add_poly_multiply(dz, nz, f, Iz_pol, n_Iz)
+      enddo
+    endif
+  enddo
+  if(n_Iz == -1) then
+    return
+  endif
+
+  rho = p * q * pq_inv_2
+  dist = (P_center(1) - Q_center(1)) * (P_center(1) - Q_center(1)) &
+       + (P_center(2) - Q_center(2)) * (P_center(2) - Q_center(2)) &
+       + (P_center(3) - Q_center(3)) * (P_center(3) - Q_center(3))
+  const = dist*rho
+
+  n_pt_tmp = n_Ix + n_Iy
+  do i = 0, n_pt_tmp
+    d_poly(i) = 0.d0
+  enddo
+
+  !DEC$ FORCEINLINE
+  call multiply_poly(Ix_pol, n_Ix, Iy_pol, n_Iy, d_poly, n_pt_tmp)
+  if(n_pt_tmp == -1) then
+    return
+  endif
+  n_pt_out = n_pt_tmp + n_Iz
+  do i = 0, n_pt_out
+    d1(i) = 0.d0
+  enddo
+
+  !DEC$ FORCEINLINE
+  call multiply_poly(d_poly, n_pt_tmp, Iz_pol, n_Iz, d1, n_pt_out)
+  accu = accu + rint_sum(n_pt_out, const, d1)
+
+  general_primitive_integral_erf_shifted = fact_p * fact_q * accu * pi_5_2 * p_inv * q_inv / dsqrt(p_plus_q)
+
+  return
+end function general_primitive_integral_erf_shifted
+!______________________________________________________________________________________________________________________
+!______________________________________________________________________________________________________________________
+
+
+
+
+
diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f
index 38478d21..b8beea76 100644
--- a/src/dft_utils_in_r/ao_in_r.irp.f
+++ b/src/dft_utils_in_r/ao_in_r.irp.f
@@ -169,4 +169,43 @@
  enddo
  END_PROVIDER
 
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)]
+ implicit none                                                                                                                                                                                             
+ BEGIN_DOC
+ ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i,r,aos_array,j) & 
+ !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra)
+ do i = 1, n_points_extra_final_grid
+  r(1) = final_grid_points_extra(1,i)
+  r(2) = final_grid_points_extra(2,i)
+  r(3) = final_grid_points_extra(3,i)
+  call give_all_aos_at_r(r,aos_array)
+  do j = 1, ao_num
+   aos_in_r_array_extra(j,i) = aos_array(j)
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ do i = 1, n_points_extra_final_grid
+  do j = 1, ao_num
+   aos_in_r_array_extra_transp(i,j) = aos_in_r_array_extra(j,i)
+  enddo
+ enddo
+
+ END_PROVIDER
 
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
new file mode 100644
index 00000000..39ea0cdf
--- /dev/null
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -0,0 +1,155 @@
+
+BEGIN_PROVIDER [ double precision, ao_abs_int_grid, (ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_abs_int_grid(i) = \int dr |phi_i(r) |
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_abs_int_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+    contrib = dabs(aos_in_r_array(i,ipoint)) * weight
+    ao_abs_int_grid(i) += contrib 
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_overlap_abs_grid, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_overlap_abs_grid(j,i) = \int dr |phi_i(r) phi_j(r)| 
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_overlap_abs_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    ao_overlap_abs_grid(j,i) += contrib 
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_center(1:3,j,i) = \int dr |phi_i(r) phi_j(r)| x/y/z / \int |phi_i(r) phi_j(r)|
+!
+! if \int |phi_i(r) phi_j(r)| < 1.d-10 then ao_prod_center = 10000.
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_prod_center = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    do m = 1, 3
+     ao_prod_center(m,j,i) += contrib * r(m)
+    enddo
+   enddo
+  enddo
+ enddo
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+    do m = 1, 3
+     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+    enddo
+   else
+    do m = 1, 3
+     ao_prod_center(m,j,i) = 10000.d0
+    enddo
+   endif
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_abs_r, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_abs_r(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
+!
+ END_DOC
+ ao_prod_abs_r = 0.d0
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3),contrib_x2
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    contrib_x2 = 0.d0
+    do m = 1, 3
+     contrib_x2 += (r(m) - ao_prod_center(m,j,i)) * (r(m) - ao_prod_center(m,j,i)) 
+    enddo
+    contrib_x2 = dsqrt(contrib_x2)
+    ao_prod_abs_r(j,i) += contrib * contrib_x2
+   enddo
+  enddo
+ enddo
+
+
+END_PROVIDER 
+
+ BEGIN_PROVIDER [double precision, ao_prod_sigma, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! Gaussian exponent reproducing the product |chi_i(r) chi_j(r)| 
+!
+! Therefore |chi_i(r) chi_j(r)|  \approx e^{-ao_prod_sigma(j,i) (r - ao_prod_center(1:3,j,i))**2}
+ END_DOC
+ integer :: i,j
+ double precision :: pi,alpha
+ pi = dacos(-1.d0)
+ do i = 1, ao_num
+  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-5)then
+     alpha = 1.d0/pi * (2.d0*ao_overlap_abs_grid(j,i)/ao_prod_abs_r(j,i))**2
+     ao_prod_sigma(j,i) = alpha
+!   endif
+  enddo
+ enddo
+ END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! ao_prod_dist_grid(j,i,ipoint) = distance between the center of |phi_i(r) phi_j(r)| and the grid point r(ipoint)
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: distance,r(3)
+ do ipoint = 1, n_points_final_grid
+  r(:) = final_grid_points(:,ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    distance = 0.d0
+    do m = 1, 3
+     distance += (ao_prod_center(m,j,i) - r(m))*(ao_prod_center(m,j,i) - r(m))
+    enddo
+    distance = dsqrt(distance)
+    ao_prod_dist_grid(j,i,ipoint)  = distance
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+
+!BEGIN_PROVIDER [ double precision, ao_abs_prod_j1b, (ao_num, ao_num)]
+! implicit none
+!
+!END_PROVIDER 
diff --git a/src/non_h_ints_mu/NEED b/src/non_h_ints_mu/NEED
new file mode 100644
index 00000000..d09ab4a5
--- /dev/null
+++ b/src/non_h_ints_mu/NEED
@@ -0,0 +1,2 @@
+ao_tc_eff_map
+bi_ortho_mos
diff --git a/src/non_h_ints_mu/README.rst b/src/non_h_ints_mu/README.rst
new file mode 100644
index 00000000..6a36bb98
--- /dev/null
+++ b/src/non_h_ints_mu/README.rst
@@ -0,0 +1,11 @@
+=============
+non_h_ints_mu
+=============
+
+Computes the non hermitian potential of the mu-TC Hamiltonian on the AO and BI-ORTHO MO basis.
+The operator is defined in Eq. 33 of JCP 154, 084119 (2021)
+
+The two providers are :
++) ao_non_hermit_term_chemist which returns the non hermitian part of the two-electron TC Hamiltonian on the MO basis. 
++) mo_non_hermit_term_chemist which returns the non hermitian part of the two-electron TC Hamiltonian on the BI-ORTHO MO basis. 
+
diff --git a/src/non_h_ints_mu/debug_fit.irp.f b/src/non_h_ints_mu/debug_fit.irp.f
new file mode 100644
index 00000000..af441335
--- /dev/null
+++ b/src/non_h_ints_mu/debug_fit.irp.f
@@ -0,0 +1,512 @@
+
+! --
+
+program debug_fit
+
+  implicit none
+
+  my_grid_becke  = .True.
+
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  !my_n_pt_r_grid = 100
+  !my_n_pt_a_grid = 170
+  !my_n_pt_r_grid = 150
+  !my_n_pt_a_grid = 194
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf j1b_pen
+
+  !call test_j1b_nucl()
+  call test_grad_j1b_nucl()
+  !call test_lapl_j1b_nucl()
+
+  !call test_list_b2()
+  !call test_list_b3()
+
+  call test_fit_u()
+  !call test_fit_u2()
+  !call test_fit_ugradu()
+
+end
+
+! ---
+
+subroutine test_j1b_nucl()
+
+  implicit none
+  integer                    :: ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision           :: r(3)
+  double precision, external :: j1b_nucl
+
+  print*, ' test_j1b_nucl ...'
+
+  PROVIDE v_1b
+
+  eps_ij  = 1d-7
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b(ipoint) 
+    i_num  = j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in v_1b on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_j1b_nucl
+
+! ---
+
+subroutine test_grad_j1b_nucl()
+
+  implicit none
+  integer                    :: ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision           :: r(3)
+  double precision, external :: grad_x_j1b_nucl
+  double precision, external :: grad_y_j1b_nucl
+  double precision, external :: grad_z_j1b_nucl
+
+  print*, ' test_grad_j1b_nucl ...'
+
+  PROVIDE v_1b_grad
+
+  eps_ij  = 1d-7
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b_grad(1,ipoint) 
+    i_num  = grad_x_j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in x of v_1b_grad on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    i_exc  = v_1b_grad(2,ipoint) 
+    i_num  = grad_y_j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in y of v_1b_grad on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    i_exc  = v_1b_grad(3,ipoint) 
+    i_num  = grad_z_j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in z of v_1b_grad on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_grad_j1b_nucl
+
+! ---
+
+subroutine test_lapl_j1b_nucl()
+
+  implicit none
+  integer                    :: ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision           :: r(3)
+  double precision, external :: lapl_j1b_nucl
+
+  print*, ' test_lapl_j1b_nucl ...'
+
+  PROVIDE v_1b_lapl
+
+  eps_ij  = 1d-5
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b_lapl(ipoint) 
+    i_num  = lapl_j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in v_1b_lapl on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_lapl_j1b_nucl
+
+! ---
+
+subroutine test_list_b2()
+
+  implicit none
+  integer                    :: ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision           :: r(3)
+  double precision, external :: j1b_nucl
+
+  print*, ' test_list_b2 ...'
+
+  PROVIDE v_1b_list_b2
+
+  eps_ij  = 1d-7
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b_list_b2(ipoint) 
+    i_num  = j1b_nucl(r)
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in list_b2 on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_list_b2
+
+! ---
+
+subroutine test_list_b3()
+
+  implicit none
+  integer                    :: ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_tmp, i_num, normalz
+  double precision           :: r(3)
+  double precision, external :: j1b_nucl
+
+  print*, ' test_list_b3 ...'
+
+  PROVIDE v_1b_list_b3
+
+  eps_ij  = 1d-7
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    i_exc  = v_1b_list_b3(ipoint) 
+    i_tmp  = j1b_nucl(r)
+    i_num  = i_tmp * i_tmp
+    acc_ij = dabs(i_exc - i_num)
+    if(acc_ij .gt. eps_ij) then
+      print *, ' problem in list_b3 on', ipoint
+      print *, ' analyt = ', i_exc
+      print *, ' numeri = ', i_num
+      print *, ' diff   = ', acc_ij
+    endif
+
+    acc_tot += acc_ij
+    normalz += dabs(i_num)
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_list_b3
+
+! ---
+
+subroutine test_fit_ugradu()
+
+  implicit none
+
+  integer                    :: jpoint, ipoint, i
+  double precision           :: i_exc, i_fit, i_num, x2, tmp, dx, dy, dz
+  double precision           :: r1(3), r2(3), grad(3)
+  double precision           :: eps_ij, acc_tot, acc_ij, normalz, coef, expo
+
+  double precision, external :: j12_mu
+
+  print*, ' test_fit_ugradu ...'
+
+  eps_ij = 1d-3
+
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    acc_tot = 0.d0
+    normalz = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r1(1) = final_grid_points(1,ipoint)
+      r1(2) = final_grid_points(2,ipoint)
+      r1(3) = final_grid_points(3,ipoint)
+  
+      dx = r1(1) - r2(1)
+      dy = r1(2) - r2(2)
+      dz = r1(3) - r2(3)
+      x2 = dx * dx + dy * dy + dz * dz
+      if(x2 .lt. 1d-10) cycle
+  
+      i_fit = 0.d0
+      do i = 1, n_max_fit_slat
+        expo   = expo_gauss_j_mu_1_erf(i)
+        coef   = coef_gauss_j_mu_1_erf(i)
+        i_fit += coef * dexp(-expo*x2)
+      enddo
+      i_fit = i_fit / dsqrt(x2)
+  
+      tmp = j12_mu(r1, r2) 
+      call grad1_j12_mu_exc(r1, r2, grad)
+  
+      ! ---
+  
+      i_exc = tmp * grad(1)
+      i_num = i_fit * dx
+      acc_ij = dabs(i_exc - i_num)
+      if(acc_ij .gt. eps_ij) then
+        print *, ' problem on x in test_fit_ugradu on', ipoint
+        print *, ' analyt = ', i_exc
+        print *, ' numeri = ', i_num
+        print *, ' diff   = ', acc_ij
+      endif
+      acc_tot += acc_ij
+      normalz += dabs(i_exc)
+  
+      ! ---
+  
+      i_exc = tmp * grad(2)
+      i_num = i_fit * dy
+      acc_ij = dabs(i_exc - i_num)
+      if(acc_ij .gt. eps_ij) then
+        print *, ' problem on y in test_fit_ugradu on', ipoint
+        print *, ' analyt = ', i_exc
+        print *, ' numeri = ', i_num
+        print *, ' diff   = ', acc_ij
+      endif
+      acc_tot += acc_ij
+      normalz += dabs(i_exc)
+  
+      ! ---
+  
+      i_exc = tmp * grad(3)
+      i_num = i_fit * dz
+      acc_ij = dabs(i_exc - i_num)
+      if(acc_ij .gt. eps_ij) then
+        print *, ' problem on z in test_fit_ugradu on', ipoint
+        print *, ' analyt = ', i_exc
+        print *, ' numeri = ', i_num
+        print *, ' diff   = ', acc_ij
+      endif
+      acc_tot += acc_ij
+      normalz += dabs(i_exc)
+  
+      ! ---
+  
+    enddo
+
+    if( (acc_tot/normalz) .gt. 1d-3 ) then
+      print*, ' acc_tot = ', acc_tot
+      print*, ' normalz = ', normalz
+    endif
+  enddo
+
+  return
+end subroutine test_fit_ugradu
+
+! ---
+
+subroutine test_fit_u()
+
+  implicit none
+
+  integer                    :: jpoint, ipoint, i
+  double precision           :: i_exc, i_fit, i_num, x2
+  double precision           :: r1(3), r2(3), dx, dy, dz
+  double precision           :: eps_ij, acc_tot, acc_ij, normalz, coef, expo
+
+  double precision, external :: j12_mu
+
+  print*, ' test_fit_u ...'
+
+  eps_ij = 1d-3
+
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    acc_tot = 0.d0
+    normalz = 0.d0
+    do ipoint = 1, n_points_final_grid
+  
+      r1(1) = final_grid_points(1,ipoint)
+      r1(2) = final_grid_points(2,ipoint)
+      r1(3) = final_grid_points(3,ipoint)
+
+      dx = r1(1) - r2(1)
+      dy = r1(2) - r2(2)
+      dz = r1(3) - r2(3)
+      x2 = dx * dx + dy * dy + dz * dz
+      if(x2 .lt. 1d-10) cycle
+  
+      i_fit = 0.d0
+      do i = 1, n_max_fit_slat
+        expo   = expo_gauss_j_mu_x(i)
+        coef   = coef_gauss_j_mu_x(i)
+        i_fit += coef * dexp(-expo*x2)
+      enddo
+  
+      i_exc = j12_mu(r1, r2) 
+      i_num = i_fit 
+      acc_ij = dabs(i_exc - i_num)
+      if(acc_ij .gt. eps_ij) then
+        print *, ' problem in test_fit_u on', ipoint
+        print *, ' analyt = ', i_exc
+        print *, ' numeri = ', i_num
+        print *, ' diff   = ', acc_ij
+      endif
+
+      acc_tot += acc_ij
+      normalz += dabs(i_exc)
+    enddo
+  
+    if( (acc_tot/normalz) .gt. 1d-3 ) then
+      print*, ' acc_tot = ', acc_tot
+      print*, ' normalz = ', normalz
+    endif
+  enddo
+
+  return
+end subroutine test_fit_u
+
+! ---
+
+subroutine test_fit_u2()
+
+  implicit none
+
+  integer                    :: jpoint, ipoint, i
+  double precision           :: i_exc, i_fit, i_num, x2
+  double precision           :: r1(3), r2(3), dx, dy, dz, tmp
+  double precision           :: eps_ij, acc_tot, acc_ij, normalz, coef, expo
+
+  double precision, external :: j12_mu
+
+  print*, ' test_fit_u2 ...'
+
+  eps_ij = 1d-3
+
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    acc_tot = 0.d0
+    normalz = 0.d0
+    do ipoint = 1, n_points_final_grid
+  
+      r1(1) = final_grid_points(1,ipoint)
+      r1(2) = final_grid_points(2,ipoint)
+      r1(3) = final_grid_points(3,ipoint)
+
+      dx = r1(1) - r2(1)
+      dy = r1(2) - r2(2)
+      dz = r1(3) - r2(3)
+      x2 = dx * dx + dy * dy + dz * dz
+      if(x2 .lt. 1d-10) cycle
+
+      i_fit = 0.d0
+      do i = 1, n_max_fit_slat
+        expo   = expo_gauss_j_mu_x_2(i)
+        coef   = coef_gauss_j_mu_x_2(i)
+        i_fit += coef * dexp(-expo*x2)
+      enddo
+
+      tmp   = j12_mu(r1, r2) 
+      i_exc = tmp * tmp
+      i_num = i_fit 
+      acc_ij = dabs(i_exc - i_num)
+      if(acc_ij .gt. eps_ij) then
+        print *, ' problem in test_fit_u2 on', ipoint
+        print *, ' analyt = ', i_exc
+        print *, ' numeri = ', i_num
+        print *, ' diff   = ', acc_ij
+      endif
+
+      acc_tot += acc_ij
+      normalz += dabs(i_exc)
+    enddo
+
+    if( (acc_tot/normalz) .gt. 1d-3 ) then
+      print*, ' acc_tot = ', acc_tot
+      print*, ' normalz = ', normalz
+    endif
+  enddo
+
+  return
+end subroutine test_fit_u2
+
+! ---
+
+
diff --git a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
new file mode 100644
index 00000000..5e7ef7e9
--- /dev/null
+++ b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
@@ -0,0 +1,780 @@
+
+! --
+
+program debug_integ_jmu_modif
+
+  implicit none
+
+  my_grid_becke  = .True.
+
+  !my_n_pt_r_grid = 30
+  !my_n_pt_a_grid = 50
+  !my_n_pt_r_grid = 100
+  !my_n_pt_a_grid = 170
+  my_n_pt_r_grid = 150
+  my_n_pt_a_grid = 194
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf j1b_pen
+
+!  call test_v_ij_u_cst_mu_j1b()
+!  call test_v_ij_erf_rk_cst_mu_j1b()
+!  call test_x_v_ij_erf_rk_cst_mu_j1b()
+!  call test_int2_u2_j1b2()
+!  call test_int2_grad1u2_grad2u2_j1b2()
+!  call test_int2_u_grad1u_total_j1b2()
+!
+!  call test_int2_grad1_u12_ao()
+!
+!  call test_grad12_j12()
+!  call test_u12sq_j1bsq()
+!  call test_u12_grad1_u12_j1b_grad1_j1b()
+!  !call test_gradu_squared_u_ij_mu()
+
+  !call test_vect_overlap_gauss_r12_ao()
+  call test_vect_overlap_gauss_r12_ao_with1s()
+
+end
+
+! ---
+
+subroutine test_v_ij_u_cst_mu_j1b()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_v_ij_u_cst_mu_j1b
+
+  print*, ' test_v_ij_u_cst_mu_j1b ...'
+
+  PROVIDE v_ij_u_cst_mu_j1b
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     v_ij_u_cst_mu_j1b(i,j,ipoint) 
+        i_num  = num_v_ij_u_cst_mu_j1b(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in v_ij_u_cst_mu_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_v_ij_u_cst_mu_j1b
+
+! ---
+
+subroutine test_v_ij_erf_rk_cst_mu_j1b()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_v_ij_erf_rk_cst_mu_j1b
+
+  print*, ' test_v_ij_erf_rk_cst_mu_j1b ...'
+
+  PROVIDE v_ij_erf_rk_cst_mu_j1b
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) 
+        i_num  = num_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in v_ij_erf_rk_cst_mu_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_v_ij_erf_rk_cst_mu_j1b
+
+! ---
+
+subroutine test_x_v_ij_erf_rk_cst_mu_j1b()
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision :: integ(3)
+
+  print*, ' test_x_v_ij_erf_rk_cst_mu_j1b ...'
+
+  PROVIDE x_v_ij_erf_rk_cst_mu_j1b
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        call num_x_v_ij_erf_rk_cst_mu_j1b(i, j, ipoint, integ)
+
+        i_exc  = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) 
+        i_num  = integ(1)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in x part of x_v_ij_erf_rk_cst_mu_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) 
+        i_num  = integ(2)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in y part of x_v_ij_erf_rk_cst_mu_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) 
+        i_num  = integ(3)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in z part of x_v_ij_erf_rk_cst_mu_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_x_v_ij_erf_rk_cst_mu_j1b
+
+! ---
+
+subroutine test_int2_u2_j1b2()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_int2_u2_j1b2
+
+  print*, ' test_int2_u2_j1b2 ...'
+
+  PROVIDE int2_u2_j1b2
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     int2_u2_j1b2(i,j,ipoint) 
+        i_num  = num_int2_u2_j1b2(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in int2_u2_j1b2 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  acc_tot = acc_tot / normalz
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_int2_u2_j1b2
+
+! ---
+
+subroutine test_int2_grad1u2_grad2u2_j1b2()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_int2_grad1u2_grad2u2_j1b2
+
+  print*, ' test_int2_grad1u2_grad2u2_j1b2 ...'
+
+  PROVIDE int2_grad1u2_grad2u2_j1b2
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     int2_grad1u2_grad2u2_j1b2(i,j,ipoint) 
+        i_num  = num_int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in int2_grad1u2_grad2u2_j1b2 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_int2_grad1u2_grad2u2_j1b2
+
+! ---
+
+subroutine test_int2_grad1_u12_ao()
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision :: integ(3)
+
+  print*, ' test_int2_grad1_u12_ao ...'
+
+  PROVIDE int2_grad1_u12_ao
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        call num_int2_grad1_u12_ao(i, j, ipoint, integ)
+
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,1) 
+        i_num  = integ(1)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in x part of int2_grad1_u12_ao on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,2) 
+        i_num  = integ(2)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in y part of int2_grad1_u12_ao on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,3) 
+        i_num  = integ(3)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in z part of int2_grad1_u12_ao on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_int2_grad1_u12_ao
+
+! ---
+
+subroutine test_int2_u_grad1u_total_j1b2()
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision :: x, y, z
+  double precision :: integ(3)
+
+  print*, ' test_int2_u_grad1u_total_j1b2 ...'
+
+  PROVIDE int2_u_grad1u_j1b2
+  PROVIDE int2_u_grad1u_x_j1b2 
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        call num_int2_u_grad1u_total_j1b2(i, j, ipoint, integ)
+
+        i_exc  = x * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+        i_num  = integ(1)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in x part of int2_u_grad1u_total_j1b2 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = y * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,2) 
+        i_num  = integ(2)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in y part of int2_u_grad1u_total_j1b2 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+        i_exc  = z * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,3) 
+        i_num  = integ(3)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in z part of int2_u_grad1u_total_j1b2 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_int2_u_grad1u_total_j1b2
+
+! ---
+
+subroutine test_gradu_squared_u_ij_mu()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_gradu_squared_u_ij_mu
+
+  print*, ' test_gradu_squared_u_ij_mu ...'
+
+  PROVIDE gradu_squared_u_ij_mu
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     gradu_squared_u_ij_mu(i,j,ipoint) 
+        i_num  = num_gradu_squared_u_ij_mu(i, j, ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in gradu_squared_u_ij_mu on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_gradu_squared_u_ij_mu 
+
+! ---
+
+subroutine test_grad12_j12()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_grad12_j12
+
+  print*, ' test_grad12_j12 ...'
+
+  PROVIDE grad12_j12
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     grad12_j12(i,j,ipoint) 
+        i_num  = num_grad12_j12(i, j, ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in grad12_j12 on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_grad12_j12
+
+! ---
+
+subroutine test_u12sq_j1bsq()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_u12sq_j1bsq
+
+  print*, ' test_u12sq_j1bsq ...'
+
+  PROVIDE u12sq_j1bsq
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     u12sq_j1bsq(i,j,ipoint) 
+        i_num  = num_u12sq_j1bsq(i, j, ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in u12sq_j1bsq on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_u12sq_j1bsq
+
+! ---
+
+subroutine test_u12_grad1_u12_j1b_grad1_j1b()
+
+  implicit none
+  integer                    :: i, j, ipoint
+  double precision           :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision, external :: num_u12_grad1_u12_j1b_grad1_j1b
+
+  print*, ' test_u12_grad1_u12_j1b_grad1_j1b ...'
+
+  PROVIDE u12_grad1_u12_j1b_grad1_j1b
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  =     u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) 
+        i_num  = num_u12_grad1_u12_j1b_grad1_j1b(i, j, ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in u12_grad1_u12_j1b_grad1_j1b on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_u12_grad1_u12_j1b_grad1_j1b
+
+! ---
+
+subroutine test_vect_overlap_gauss_r12_ao()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao
+
+  print *, ' test_vect_overlap_gauss_r12_ao ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit = expo_gauss_j_mu_x_2(1)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao(r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
+! ---
+
+subroutine test_vect_overlap_gauss_r12_ao_with1s()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3), beta, B_center(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print *, ' test_vect_overlap_gauss_r12_ao_with1s ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit    = expo_gauss_j_mu_x_2(1)
+  beta        = List_all_comb_b3_expo  (2)
+  B_center(1) = List_all_comb_b3_cent(1,2)
+  B_center(2) = List_all_comb_b3_cent(2,2)
+  B_center(3) = List_all_comb_b3_cent(3,2)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f
new file mode 100644
index 00000000..ff3d11f3
--- /dev/null
+++ b/src/non_h_ints_mu/grad_squared.irp.f
@@ -0,0 +1,437 @@
+
+! ---
+
+!       TODO : strong optmization : write the loops in a different way
+!            : for each couple of AO, the gaussian product are done once for all 
+
+BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_points_final_grid) ]
+
+  BEGIN_DOC
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! gradu_squared_u_ij_mu = -0.50 x \int r2 [ (grad_1 u12)^2 + (grad_2 u12^2)] \phi_i(2) \phi_j(2)
+  !                       = -0.25 x \int r2       (1 - erf(mu*r12))^2          \phi_i(2) \phi_j(2)
+  ! and
+  !   (1 - erf(mu*r12))^2 = \sum_i coef_gauss_1_erf_x_2(i) * exp(-expo_gauss_1_erf_x_2(i) * r12^2)
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! gradu_squared_u_ij_mu = -0.50 x \int r2 \phi_i(2) \phi_j(2) [ v1^2  v2^2 ((grad_1 u12)^2 + (grad_2 u12^2)]) + u12^2  v2^2 (grad_1 v1)^2 + 2 u12 v1 v2^2 (grad_1 u12) . (grad_1 v1) ]
+  !                       = -0.25 x        v1^2      \int r2 \phi_i(2) \phi_j(2) [1 - erf(mu r12)]^2 v2^2
+  !                       + -0.50 x    (grad_1 v1)^2 \int r2 \phi_i(2) \phi_j(2)      u12^2          v2^2
+  !                       + -1.00 x v1 (grad_1 v1)   \int r2 \phi_i(2) \phi_j(2)   (grad_1 u12)      v2^2 
+  !                       =                 v1^2        x   int2_grad1u2_grad2u2_j1b2
+  !                       + -0.5 x      (grad_1 v1)^2   x   int2_u2_j1b2
+  !                       + -1.0 X V1 x (grad_1 v1)   \cdot [ int2_u_grad1u_j1b2 x r - int2_u_grad1u_x_j1b ]
+  !
+  !
+  END_DOC
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z, r(3), delta, coef
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing gradu_squared_u_ij_mu ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+
+      x     = final_grid_points(1,ipoint)
+      y     = final_grid_points(2,ipoint)
+      z     = final_grid_points(3,ipoint)
+      tmp_v = v_1b       (ipoint)
+      tmp_x = v_1b_grad(1,ipoint)
+      tmp_y = v_1b_grad(2,ipoint)
+      tmp_z = v_1b_grad(3,ipoint)
+
+      tmp1 = tmp_v * tmp_v
+      tmp2 = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+      tmp3 = tmp_v * tmp_x
+      tmp4 = tmp_v * tmp_y
+      tmp5 = tmp_v * tmp_z
+
+      tmp6 = -x * tmp3
+      tmp7 = -y * tmp4
+      tmp8 = -z * tmp5
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
+
+          gradu_squared_u_ij_mu(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)            &
+                                            + tmp2 * int2_u2_j1b2             (i,j,ipoint)            &
+                                            + tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) &
+                                            + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) &
+                                            + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    gradu_squared_u_ij_mu = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            gradu_squared_u_ij_mu(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for gradu_squared_u_ij_mu = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+!BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
+!
+!  BEGIN_DOC
+!  !
+!  ! tc_grad_square_ao_loop(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: ipoint, i, j, k, l
+!  double precision              :: weight1, ao_ik_r, ao_i_r
+!  double precision, allocatable :: ac_mat(:,:,:,:)
+!
+!  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
+!  ac_mat = 0.d0
+!
+!  do ipoint = 1, n_points_final_grid
+!    weight1 = final_weight_at_r_vector(ipoint)
+!
+!    do i = 1, ao_num
+!      ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+!
+!      do k = 1, ao_num
+!        ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+!
+!        do j = 1, ao_num
+!          do l = 1, ao_num
+!            ac_mat(k,i,l,j) += ao_ik_r * gradu_squared_u_ij_mu(l,j,ipoint)
+!          enddo
+!        enddo
+!      enddo
+!    enddo
+!  enddo
+!
+!  do j = 1, ao_num
+!    do l = 1, ao_num
+!      do i = 1, ao_num
+!        do k = 1, ao_num
+!          tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+!          !write(11,*) tc_grad_square_ao_loop(k,i,l,j)
+!        enddo
+!      enddo
+!    enddo
+!  enddo
+!
+!  deallocate(ac_mat)
+!
+!END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao_loop(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_square_ao_loop ...'
+  call wall_time(time0)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
+  ac_mat = 0.d0
+  allocate(bc_mat(ao_num,ao_num,ao_num,ao_num))
+  bc_mat = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    weight1 = final_weight_at_r_vector(ipoint)
+
+    do i = 1, ao_num
+      !ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+      ao_i_r = weight1 * aos_in_r_array(i,ipoint)
+
+      do k = 1, ao_num
+        !ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+        ao_ik_r = ao_i_r * aos_in_r_array(k,ipoint)
+
+        do j = 1, ao_num
+          do l = 1, ao_num
+            ac_mat(k,i,l,j) += ao_ik_r * ( u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) )
+            bc_mat(k,i,l,j) += ao_ik_r * grad12_j12(l,j,ipoint)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+  deallocate(bc_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_loop = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, grad12_j12, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: r(3), delta, coef
+  double precision           :: tmp1
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing grad12_j12 ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      tmp1 = v_1b(ipoint)
+      tmp1 = tmp1 * tmp1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          grad12_j12(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    grad12_j12 = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            grad12_j12(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for grad12_j12 = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_grid) ]
+
+  implicit none
+  integer                    :: ipoint, i, j
+  double precision           :: tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1
+  double precision           :: time0, time1
+
+  print*, ' providing u12sq_j1bsq ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+    tmp1  = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        u12sq_j1bsq(i,j,ipoint) = tmp1 * int2_u2_j1b2(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12sq_j1bsq = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+! ---
+
+BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing u12_grad1_u12_j1b_grad1_j1b ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+
+    x     = final_grid_points(1,ipoint)
+    y     = final_grid_points(2,ipoint)
+    z     = final_grid_points(3,ipoint)
+    tmp_v = v_1b       (ipoint)
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+
+    tmp3 = tmp_v * tmp_x
+    tmp4 = tmp_v * tmp_y
+    tmp5 = tmp_v * tmp_z
+
+    tmp6 = -x * tmp3
+    tmp7 = -y * tmp4
+    tmp8 = -z * tmp5
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
+
+        u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) &
+                                                + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) &
+                                                + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao ...'
+  call wall_time(time0)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (i, k, ipoint) & 
+ !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  tmp = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (j, l, ipoint) & 
+ !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12)
+ !$OMP DO SCHEDULE (static)
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do l = 1, ao_num
+        tmp(l,j,ipoint) = u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) + 0.5d0 * grad12_j12(l,j,ipoint)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+
+  ac_mat = 0.d0
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+            , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+            , 1.d0, ac_mat, ao_num*ao_num)
+  deallocate(tmp, b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
new file mode 100644
index 00000000..180c9588
--- /dev/null
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -0,0 +1,221 @@
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao_test(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r,contrib,contrib2
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao_test ...'
+  call wall_time(time0)
+
+  provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (i, k, ipoint) & 
+ !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  tmp = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (j, l, ipoint) & 
+ !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test)
+ !$OMP DO SCHEDULE (static)
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do l = 1, ao_num
+        tmp(l,j,ipoint) = u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) + 0.5d0 * grad12_j12_test(l,j,ipoint)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+            , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+            , 1.d0, ac_mat, ao_num*ao_num)
+  deallocate(tmp, b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_square_ao_test, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_final_grid) ]
+
+  implicit none
+  integer                    :: ipoint, i, j
+  double precision           :: tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1
+  double precision           :: time0, time1
+
+  print*, ' providing u12sq_j1bsq_test ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+    tmp1  = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        u12sq_j1bsq_test(i,j,ipoint) = tmp1 * int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...'
+
+  provide int2_u_grad1u_x_j1b2_test
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+
+    x     = final_grid_points(1,ipoint)
+    y     = final_grid_points(2,ipoint)
+    z     = final_grid_points(3,ipoint)
+    tmp_v = v_1b       (ipoint)
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+
+    tmp3 = tmp_v * tmp_x
+    tmp4 = tmp_v * tmp_y
+    tmp5 = tmp_v * tmp_z
+
+    tmp6 = -x * tmp3
+    tmp7 = -y * tmp4
+    tmp8 = -z * tmp5
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        tmp9 = int2_u_grad1u_j1b2_test(i,j,ipoint)
+
+        u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,1) &
+                                                     + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,2) &
+                                                     + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: r(3), delta, coef
+  double precision           :: tmp1
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+  provide int2_grad1u2_grad2u2_j1b2_test
+  print*, ' providing grad12_j12_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      tmp1 = v_1b(ipoint)
+      tmp1 = tmp1 * tmp1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          grad12_j12_test(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    grad12_j12_test = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            grad12_j12_test(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for grad12_j12_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/grad_tc_int.irp.f b/src/non_h_ints_mu/grad_tc_int.irp.f
new file mode 100644
index 00000000..cb3b71a3
--- /dev/null
+++ b/src/non_h_ints_mu/grad_tc_int.irp.f
@@ -0,0 +1,217 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_non_hermit_term_chemist, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !                            1 1 2 2      1 2                                1 2 
+  !
+  ! ao_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the AO basis
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l, ipoint, m
+  double precision              :: weight1, r(3)
+  double precision              :: wall1, wall0
+  double precision, allocatable :: b_mat(:,:,:,:), ac_mat(:,:,:,:)
+
+  provide v_ij_erf_rk_cst_mu x_v_ij_erf_rk_cst_mu
+
+  call wall_time(wall0)
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+
+ !$OMP PARALLEL                         &
+ !$OMP DEFAULT (NONE)                   &
+ !$OMP PRIVATE (i,k,m,ipoint,r,weight1) & 
+ !$OMP SHARED (aos_in_r_array_transp,aos_grad_in_r_array_transp_bis,b_mat)& 
+ !$OMP SHARED (ao_num,n_points_final_grid,final_grid_points,final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do m = 1, 3
+    do i = 1, ao_num
+      do k = 1, ao_num
+        do ipoint = 1, n_points_final_grid
+          r(1) = final_grid_points(1,ipoint)
+          r(2) = final_grid_points(2,ipoint)
+          r(3) = final_grid_points(3,ipoint)
+          weight1 = final_weight_at_r_vector(ipoint)
+          b_mat(ipoint,k,i,m) = 0.5d0 * aos_in_r_array_transp(ipoint,k) * r(m) * weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,m) 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ! (A)                b_mat(ipoint,k,i,m) X v_ij_erf_rk_cst_mu(j,l,r1)
+  ! 1/2 \int dr1 x1 phi_k(1) d/dx1 phi_i(1) \int dr2 (1 - erf(mu_r12))/r12  phi_j(2) phi_l(2)
+  ac_mat = 0.d0
+  do m = 1, 3
+    !           A   B^T  dim(A,1)       dim(B,2)       dim(A,2)        alpha * A                LDA 
+
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0             &
+              , v_ij_erf_rk_cst_mu(1,1,1), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num) 
+
+  enddo
+
+ !$OMP PARALLEL                       &
+ !$OMP DEFAULT (NONE)                 &
+ !$OMP PRIVATE (i,k,m,ipoint,weight1) & 
+ !$OMP SHARED (aos_in_r_array_transp,aos_grad_in_r_array_transp_bis,b_mat,ao_num,n_points_final_grid,final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do m = 1, 3
+    do i = 1, ao_num
+      do k = 1, ao_num
+        do ipoint = 1, n_points_final_grid
+          weight1 = final_weight_at_r_vector(ipoint)
+          b_mat(ipoint,k,i,m) = 0.5d0 * aos_in_r_array_transp(ipoint,k) * weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,m) 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ ! (B)                b_mat(ipoint,k,i,m) X x_v_ij_erf_rk_cst_mu(j,l,r1,m)
+ ! 1/2 \int dr1 phi_k(1) d/dx1 phi_i(1) \int dr2 x2(1 - erf(mu_r12))/r12  phi_j(2) phi_l(2)
+  do m = 1, 3
+   !           A   B^T  dim(A,1)       dim(B,2)       dim(A,2)        alpha * A                LDA 
+
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, -1.d0                &
+              , x_v_ij_erf_rk_cst_mu(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num) 
+  enddo
+
+ !$OMP PARALLEL          &
+ !$OMP DEFAULT (NONE)    &
+ !$OMP PRIVATE (i,k,j,l) & 
+ !$OMP SHARED (ac_mat,ao_non_hermit_term_chemist,ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          !                          (ki|lj)           (ki|lj)           (lj|ki)
+          ao_non_hermit_term_chemist(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)    
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time dgemm ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+! TODO :: optimization :: transform into DGEM
+
+BEGIN_PROVIDER [double precision, mo_non_hermit_term_chemist, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !                            1 1 2 2      1 2                                1 2 
+  !
+  ! mo_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the MO basis
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l, m, n, p, q
+  double precision, allocatable :: mo_tmp_1(:,:,:,:), mo_tmp_2(:,:,:,:)
+ 
+  allocate(mo_tmp_1(mo_num,ao_num,ao_num,ao_num))
+  mo_tmp_1 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do q = 1, ao_num
+          do k = 1, mo_num
+            !       (k n|p m)    = sum_q c_qk * (q n|p m)
+            mo_tmp_1(k,n,p,m) += mo_coef_transp(k,q) * ao_non_hermit_term_chemist(q,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  free ao_non_hermit_term_chemist 
+
+  allocate(mo_tmp_2(mo_num,mo_num,ao_num,ao_num))
+  mo_tmp_2 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            !       (k i|p m) = sum_n c_ni * (k n|p m)
+            mo_tmp_2(k,i,p,m) += mo_coef_transp(i,n) * mo_tmp_1(k,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+  allocate(mo_tmp_1(mo_num,mo_num,mo_num,ao_num))
+  mo_tmp_1 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_tmp_1(k,i,l,m) += mo_coef_transp(l,p) * mo_tmp_2(k,i,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_2)
+
+  mo_non_hermit_term_chemist = 0.d0
+  do m = 1, ao_num
+    do j = 1, mo_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_non_hermit_term_chemist(k,i,l,j) += mo_coef_transp(j,m) * mo_tmp_1(k,i,l,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, mo_non_hermit_term, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !                    1 2 1 2      1 2                                1 2 
+  !
+  ! mo_non_hermit_term(k,l,i,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the MO basis
+  END_DOC
+
+  implicit none
+  integer :: i, j, k, l
+
+  do j = 1, mo_num
+    do i = 1, mo_num
+      do l = 1, mo_num
+        do k = 1, mo_num
+          mo_non_hermit_term(k,l,i,j) = mo_non_hermit_term_chemist(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f
new file mode 100644
index 00000000..a515e0b8
--- /dev/null
+++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f
@@ -0,0 +1,640 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_1b, (n_points_final_grid)]
+
+  implicit none
+  integer          :: ipoint, i, j, phase
+  double precision :: x, y, z, dx, dy, dz
+  double precision :: a, d, e, fact_r
+
+  do ipoint = 1, n_points_final_grid
+
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    fact_r = 1.d0
+    do j = 1, nucl_num
+      a  = j1b_pen(j)
+      dx = x - nucl_coord(j,1)
+      dy = y - nucl_coord(j,2)
+      dz = z - nucl_coord(j,3)
+      d  = dx*dx + dy*dy + dz*dz
+      e  = 1.d0 - dexp(-a*d)
+
+      fact_r = fact_r * e
+    enddo
+
+    v_1b(ipoint) = fact_r
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_1b_grad, (3, n_points_final_grid)]
+
+  implicit none
+  integer          :: ipoint, i, j, phase
+  double precision :: x, y, z, dx, dy, dz
+  double precision :: a, d, e
+  double precision :: fact_x, fact_y, fact_z
+  double precision :: ax_der, ay_der, az_der, a_expo
+
+  do ipoint = 1, n_points_final_grid
+
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    fact_x = 0.d0
+    fact_y = 0.d0
+    fact_z = 0.d0
+    do i = 1, List_all_comb_b2_size
+
+      phase  = 0
+      a_expo = 0.d0
+      ax_der = 0.d0
+      ay_der = 0.d0
+      az_der = 0.d0
+      do j = 1, nucl_num
+        a  = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+        dx = x - nucl_coord(j,1)
+        dy = y - nucl_coord(j,2)
+        dz = z - nucl_coord(j,3)
+      
+        phase  += List_all_comb_b2(j,i)
+        a_expo += a * (dx*dx + dy*dy + dz*dz)
+        ax_der += a * dx
+        ay_der += a * dy
+        az_der += a * dz
+      enddo
+      e = -2.d0 * (-1.d0)**dble(phase) * dexp(-a_expo)
+
+      fact_x += e * ax_der 
+      fact_y += e * ay_der 
+      fact_z += e * az_der 
+    enddo
+
+    v_1b_grad(1,ipoint) = fact_x
+    v_1b_grad(2,ipoint) = fact_y
+    v_1b_grad(3,ipoint) = fact_z
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_1b_lapl, (n_points_final_grid)]
+
+  implicit none
+  integer          :: ipoint, i, j, phase
+  double precision :: x, y, z, dx, dy, dz
+  double precision :: a, d, e, b
+  double precision :: fact_r
+  double precision :: ax_der, ay_der, az_der, a_expo
+
+  do ipoint = 1, n_points_final_grid
+
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    fact_r = 0.d0
+    do i = 1, List_all_comb_b2_size
+
+      phase  = 0
+      b      = 0.d0
+      a_expo = 0.d0
+      ax_der = 0.d0
+      ay_der = 0.d0
+      az_der = 0.d0
+      do j = 1, nucl_num
+        a  = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
+        dx = x - nucl_coord(j,1)
+        dy = y - nucl_coord(j,2)
+        dz = z - nucl_coord(j,3)
+      
+        phase  += List_all_comb_b2(j,i)
+        b      += a
+        a_expo += a * (dx*dx + dy*dy + dz*dz)
+        ax_der += a * dx
+        ay_der += a * dy
+        az_der += a * dz
+      enddo
+
+      fact_r += (-1.d0)**dble(phase) * (-6.d0*b + 4.d0*(ax_der*ax_der + ay_der*ay_der + az_der*az_der) ) * dexp(-a_expo)
+    enddo
+
+    v_1b_lapl(ipoint) = fact_r
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_1b_list_b2, (n_points_final_grid)]
+
+  implicit none
+  integer          :: i, ipoint
+  double precision :: x, y, z, coef, expo, dx, dy, dz
+  double precision :: fact_r
+
+  PROVIDE List_all_comb_b2_coef List_all_comb_b2_expo List_all_comb_b2_cent
+
+  do ipoint = 1, n_points_final_grid
+
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    fact_r = 0.d0
+    do i = 1, List_all_comb_b2_size
+
+      coef = List_all_comb_b2_coef(i)
+      expo = List_all_comb_b2_expo(i) 
+
+      dx = x - List_all_comb_b2_cent(1,i)
+      dy = y - List_all_comb_b2_cent(2,i)
+      dz = z - List_all_comb_b2_cent(3,i)
+
+      fact_r += coef * dexp(-expo * (dx*dx + dy*dy + dz*dz))
+    enddo
+
+    v_1b_list_b2(ipoint) = fact_r
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_1b_list_b3, (n_points_final_grid)]
+
+  implicit none
+  integer          :: i, ipoint
+  double precision :: x, y, z, coef, expo, dx, dy, dz
+  double precision :: fact_r
+
+  PROVIDE List_all_comb_b3_coef List_all_comb_b3_expo List_all_comb_b3_cent
+
+  do ipoint = 1, n_points_final_grid
+
+    x = final_grid_points(1,ipoint)
+    y = final_grid_points(2,ipoint)
+    z = final_grid_points(3,ipoint)
+
+    fact_r = 0.d0
+    do i = 1, List_all_comb_b3_size
+
+      coef = List_all_comb_b3_coef(i)
+      expo = List_all_comb_b3_expo(i) 
+
+      dx = x - List_all_comb_b3_cent(1,i)
+      dy = y - List_all_comb_b3_cent(2,i)
+      dz = z - List_all_comb_b3_cent(3,i)
+
+      fact_r += coef * dexp(-expo * (dx*dx + dy*dy + dz*dz))
+    enddo
+
+    v_1b_list_b3(ipoint) = fact_r
+  enddo
+
+END_PROVIDER
+
+! ---
+
+double precision function jmu_modif(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision, external   :: j12_mu, j12_nucl
+
+  jmu_modif = j12_mu(r1, r2) * j12_nucl(r1, r2)
+
+  return
+end function jmu_modif
+
+! ---
+
+double precision function j12_mu(r1, r2)
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: mu_r12, r12
+
+  r12 = dsqrt( (r1(1) - r2(1)) * (r1(1) - r2(1)) &
+             + (r1(2) - r2(2)) * (r1(2) - r2(2)) &
+             + (r1(3) - r2(3)) * (r1(3) - r2(3)) )
+  mu_r12 = mu_erf * r12
+
+  j12_mu = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf
+
+  return
+end function j12_mu
+
+! ---
+
+double precision function j12_mu_r12(r12)
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: r12
+  double precision             :: mu_r12
+
+  mu_r12 = mu_erf * r12
+
+  j12_mu_r12 = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf
+
+  return
+end function j12_mu_r12
+
+! ---
+
+double precision function j12_mu_gauss(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  integer                      :: i
+  double precision             :: r12, coef, expo
+
+  r12 = (r1(1) - r2(1)) * (r1(1) - r2(1)) &
+      + (r1(2) - r2(2)) * (r1(2) - r2(2)) &
+      + (r1(3) - r2(3)) * (r1(3) - r2(3)) 
+
+  j12_mu_gauss = 0.d0
+  do i = 1, n_max_fit_slat
+    expo = expo_gauss_j_mu_x(i)
+    coef = coef_gauss_j_mu_x(i)
+
+    j12_mu_gauss += coef * dexp(-expo*r12)
+  enddo
+
+  return
+end function j12_mu_gauss
+
+! ---
+
+double precision function j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  integer                      :: i
+  double precision             :: a, d, e
+
+  j1b_nucl = 1.d0
+
+  do i = 1, nucl_num
+    a = j1b_pen(i)
+    d = ( (r(1) - nucl_coord(i,1)) * (r(1) - nucl_coord(i,1)) &
+        + (r(2) - nucl_coord(i,2)) * (r(2) - nucl_coord(i,2)) &
+        + (r(3) - nucl_coord(i,3)) * (r(3) - nucl_coord(i,3)) )
+    e = 1.d0 - exp(-a*d)
+
+    j1b_nucl = j1b_nucl * e
+  enddo
+
+  return
+end function j1b_nucl
+
+! ---
+
+double precision function j12_nucl(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision, external   :: j1b_nucl
+
+  j12_nucl = j1b_nucl(r1) * j1b_nucl(r2)
+
+  return
+end function j12_nucl
+
+! ---
+
+! ---------------------------------------------------------------------------------------
+
+double precision function grad_x_j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  double precision             :: r_eps(3), eps, fp, fm, delta
+  double precision, external   :: j1b_nucl
+
+  eps   = 1d-6
+  r_eps = r
+  delta = max(eps, dabs(eps*r(1)))
+
+  r_eps(1) = r_eps(1) + delta
+  fp       = j1b_nucl(r_eps)
+  r_eps(1) = r_eps(1) - 2.d0 * delta
+  fm       = j1b_nucl(r_eps)
+
+  grad_x_j1b_nucl = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad_x_j1b_nucl
+
+double precision function grad_y_j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  double precision             :: r_eps(3), eps, fp, fm, delta
+  double precision, external   :: j1b_nucl
+
+  eps   = 1d-6
+  r_eps = r
+  delta = max(eps, dabs(eps*r(2)))
+
+  r_eps(2) = r_eps(2) + delta
+  fp       = j1b_nucl(r_eps)
+  r_eps(2) = r_eps(2) - 2.d0 * delta
+  fm       = j1b_nucl(r_eps)
+
+  grad_y_j1b_nucl = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad_y_j1b_nucl
+
+double precision function grad_z_j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  double precision             :: r_eps(3), eps, fp, fm, delta
+  double precision, external   :: j1b_nucl
+
+  eps   = 1d-6
+  r_eps = r
+  delta = max(eps, dabs(eps*r(3)))
+
+  r_eps(3) = r_eps(3) + delta
+  fp       = j1b_nucl(r_eps)
+  r_eps(3) = r_eps(3) - 2.d0 * delta
+  fm       = j1b_nucl(r_eps)
+
+  grad_z_j1b_nucl = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad_z_j1b_nucl
+
+! ---------------------------------------------------------------------------------------
+
+! ---
+
+double precision function lapl_j1b_nucl(r)
+
+  implicit none
+  double precision, intent(in) :: r(3)
+  double precision             :: r_eps(3), eps, fp, fm, delta
+  double precision, external   :: grad_x_j1b_nucl
+  double precision, external   :: grad_y_j1b_nucl
+  double precision, external   :: grad_z_j1b_nucl
+
+  eps   = 1d-5
+  r_eps = r
+
+  lapl_j1b_nucl = 0.d0
+
+  ! ---
+
+  delta    = max(eps, dabs(eps*r(1)))
+  r_eps(1) = r_eps(1) + delta
+  fp       = grad_x_j1b_nucl(r_eps)
+  r_eps(1) = r_eps(1) - 2.d0 * delta
+  fm       = grad_x_j1b_nucl(r_eps)
+  r_eps(1) = r_eps(1) + delta
+
+  lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta
+
+  ! ---
+
+  delta    = max(eps, dabs(eps*r(2)))
+  r_eps(2) = r_eps(2) + delta
+  fp       = grad_y_j1b_nucl(r_eps)
+  r_eps(2) = r_eps(2) - 2.d0 * delta
+  fm       = grad_y_j1b_nucl(r_eps)
+  r_eps(2) = r_eps(2) + delta
+
+  lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta
+
+  ! ---
+
+  delta    = max(eps, dabs(eps*r(3)))
+  r_eps(3) = r_eps(3) + delta
+  fp       = grad_z_j1b_nucl(r_eps)
+  r_eps(3) = r_eps(3) - 2.d0 * delta
+  fm       = grad_z_j1b_nucl(r_eps)
+  r_eps(3) = r_eps(3) + delta
+
+  lapl_j1b_nucl += 0.5d0 * (fp - fm) / delta
+
+  ! ---
+
+  return
+end function lapl_j1b_nucl
+
+! ---
+
+! ---------------------------------------------------------------------------------------
+
+double precision function grad1_x_jmu_modif(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: jmu_modif
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(1)))
+
+  r1_eps(1) = r1_eps(1) + delta
+  fp        = jmu_modif(r1_eps, r2)
+  r1_eps(1) = r1_eps(1) - 2.d0 * delta
+  fm        = jmu_modif(r1_eps, r2)
+
+  grad1_x_jmu_modif = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_x_jmu_modif
+
+double precision function grad1_y_jmu_modif(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: jmu_modif
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(2)))
+
+  r1_eps(2) = r1_eps(2) + delta
+  fp        = jmu_modif(r1_eps, r2) 
+  r1_eps(2) = r1_eps(2) - 2.d0 * delta
+  fm        = jmu_modif(r1_eps, r2) 
+
+  grad1_y_jmu_modif = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_y_jmu_modif
+
+double precision function grad1_z_jmu_modif(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: jmu_modif
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(3)))
+
+  r1_eps(3) = r1_eps(3) + delta
+  fp        = jmu_modif(r1_eps, r2)
+  r1_eps(3) = r1_eps(3) - 2.d0 * delta
+  fm        = jmu_modif(r1_eps, r2)
+
+  grad1_z_jmu_modif = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_z_jmu_modif
+
+! ---------------------------------------------------------------------------------------
+
+! ---
+
+! ---------------------------------------------------------------------------------------
+
+double precision function grad1_x_j12_mu_num(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: j12_mu
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(1)))
+
+  r1_eps(1) = r1_eps(1) + delta
+  fp        = j12_mu(r1_eps, r2)
+  r1_eps(1) = r1_eps(1) - 2.d0 * delta
+  fm        = j12_mu(r1_eps, r2)
+
+  grad1_x_j12_mu_num = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_x_j12_mu_num
+
+double precision function grad1_y_j12_mu_num(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: j12_mu
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(2)))
+
+  r1_eps(2) = r1_eps(2) + delta
+  fp        = j12_mu(r1_eps, r2)
+  r1_eps(2) = r1_eps(2) - 2.d0 * delta
+  fm        = j12_mu(r1_eps, r2)
+
+  grad1_y_j12_mu_num = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_y_j12_mu_num
+
+double precision function grad1_z_j12_mu_num(r1, r2)
+
+  implicit none
+  double precision, intent(in) :: r1(3), r2(3)
+  double precision             :: r1_eps(3), eps, fp, fm, delta
+  double precision, external   :: j12_mu
+
+  eps    = 1d-7
+  r1_eps = r1
+  delta  = max(eps, dabs(eps*r1(3)))
+
+  r1_eps(3) = r1_eps(3) + delta
+  fp        = j12_mu(r1_eps, r2)
+  r1_eps(3) = r1_eps(3) - 2.d0 * delta
+  fm        = j12_mu(r1_eps, r2)
+
+  grad1_z_j12_mu_num = 0.5d0 * (fp - fm) / delta
+
+  return
+end function grad1_z_j12_mu_num
+
+! ---------------------------------------------------------------------------------------
+
+! ---
+
+subroutine grad1_j12_mu_exc(r1, r2, grad)
+
+  implicit none
+  double precision, intent(in)  :: r1(3), r2(3)
+  double precision, intent(out) :: grad(3)
+  double precision              :: dx, dy, dz, r12, tmp
+
+  grad = 0.d0
+
+  dx = r1(1) - r2(1)
+  dy = r1(2) - r2(2)
+  dz = r1(3) - r2(3)
+
+  r12 = dsqrt( dx * dx + dy * dy + dz * dz )
+  if(r12 .lt. 1d-10) return
+
+  tmp = 0.5d0 * (1.d0 - derf(mu_erf * r12)) / r12
+
+  grad(1) = tmp * dx 
+  grad(2) = tmp * dy 
+  grad(3) = tmp * dz 
+
+  return
+end subroutine grad1_j12_mu_exc
+
+! ---
+
+subroutine grad1_jmu_modif_num(r1, r2, grad)
+
+  implicit none
+
+  double precision, intent(in)  :: r1(3), r2(3)
+  double precision, intent(out) :: grad(3)
+
+  double precision              :: tmp0, tmp1, tmp2, tmp3, tmp4, grad_u12(3)
+
+  double precision, external    :: j12_mu
+  double precision, external    :: j1b_nucl
+  double precision, external    :: grad_x_j1b_nucl
+  double precision, external    :: grad_y_j1b_nucl
+  double precision, external    :: grad_z_j1b_nucl
+
+  call grad1_j12_mu_exc(r1, r2, grad_u12)
+
+  tmp0 = j1b_nucl(r1) 
+  tmp1 = j1b_nucl(r2)
+  tmp2 = j12_mu(r1, r2)
+  tmp3 = tmp0 * tmp1
+  tmp4 = tmp2 * tmp1
+
+  grad(1) = tmp3 * grad_u12(1) + tmp4 * grad_x_j1b_nucl(r1)
+  grad(2) = tmp3 * grad_u12(2) + tmp4 * grad_y_j1b_nucl(r1)
+  grad(3) = tmp3 * grad_u12(3) + tmp4 * grad_z_j1b_nucl(r1)
+
+  return
+end subroutine grad1_jmu_modif_num
+
+! ---
+
+
+
+
diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f
new file mode 100644
index 00000000..854789bd
--- /dev/null
+++ b/src/non_h_ints_mu/new_grad_tc.irp.f
@@ -0,0 +1,360 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int2_grad1_u12_ao(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
+  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: time0, time1
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
+
+          int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+          int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+          int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+          int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,1)
+          int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,2)
+          int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,3)
+        enddo
+      enddo
+    enddo
+
+    int2_grad1_u12_ao *= 0.5d0
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for int2_grad1_u12_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1)
+  !                                 = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !                                 = -int2_grad1_u12_ao(i,j,ipoint,:)
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) =      v2    x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ]
+  !                                 - \grad_2 v2 x [       \int dr1                   u12              v1 \phi_i(r1) \phi_j(r1) ] 
+  !                                 =   -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 +    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
+
+          int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+          int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+          int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao
+
+  endif
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao_loop(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
+  double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: ao_j_r, ao_l_r, ao_l_dx, ao_l_dy, ao_l_dz
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao_loop ...'
+  call wall_time(time0)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
+  ac_mat = 0.d0
+
+  ! ---
+
+  do ipoint = 1, n_points_final_grid
+    weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+
+    do i = 1, ao_num
+      !ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
+      !ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
+      !ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
+      !ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+      ao_i_r  = weight1 * aos_in_r_array     (i,ipoint)
+      ao_i_dx = weight1 * aos_grad_in_r_array(i,ipoint,1)
+      ao_i_dy = weight1 * aos_grad_in_r_array(i,ipoint,2)
+      ao_i_dz = weight1 * aos_grad_in_r_array(i,ipoint,3)
+
+      do k = 1, ao_num
+        !ao_k_r = aos_in_r_array_transp(ipoint,k)
+        ao_k_r = aos_in_r_array(k,ipoint)
+
+        !tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
+        !tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
+        !tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
+        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array(k,ipoint,1) 
+        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array(k,ipoint,2) 
+        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array(k,ipoint,3) 
+
+        do j = 1, ao_num
+          do l = 1, ao_num
+
+            contrib_x = int2_grad1_u12_ao(l,j,ipoint,1) * tmp_x 
+            contrib_y = int2_grad1_u12_ao(l,j,ipoint,2) * tmp_y 
+            contrib_z = int2_grad1_u12_ao(l,j,ipoint,3) * tmp_z 
+
+            ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  !do ipoint = 1, n_points_final_grid
+  !  weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+
+  !  do l = 1, ao_num
+  !    ao_l_r  = weight1 * aos_in_r_array_transp         (ipoint,l)
+  !    ao_l_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,1)
+  !    ao_l_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,2)
+  !    ao_l_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,3)
+
+  !    do j = 1, ao_num
+  !      ao_j_r = aos_in_r_array_transp(ipoint,j)
+
+  !      tmp_x = ao_j_r * ao_l_dx - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,1) 
+  !      tmp_y = ao_j_r * ao_l_dy - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,2) 
+  !      tmp_z = ao_j_r * ao_l_dz - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,3) 
+
+  !      do i = 1, ao_num
+  !        do k = 1, ao_num
+
+  !          contrib_x = int2_grad1_u12_ao(k,i,ipoint,1) * tmp_x 
+  !          contrib_y = int2_grad1_u12_ao(k,i,ipoint,2) * tmp_y 
+  !          contrib_z = int2_grad1_u12_ao(k,i,ipoint,3) * tmp_z 
+
+  !          ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  ! ---
+ 
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+          !tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_loop = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l, m
+  double precision              :: weight1, ao_k_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao ...'
+  call wall_time(time0)
+
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL                                                              &
+ !$OMP DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+ !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+ !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+
+        weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+        ao_i_r  = aos_in_r_array_transp(ipoint,i)
+        ao_k_r  = aos_in_r_array_transp(ipoint,k)
+
+        b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) 
+        b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) 
+        b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  do m = 1, 3
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0              &
+              , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num) 
+
+  enddo
+  deallocate(b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+          !tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
new file mode 100644
index 00000000..4d85e061
--- /dev/null
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -0,0 +1,174 @@
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
+  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: time0, time1
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1) - tmp2 * tmp_x
+          int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2) - tmp2 * tmp_y
+          int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,1)
+          int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,2)
+          int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,3)
+        enddo
+      enddo
+    enddo
+
+    int2_grad1_u12_ao_test *= 0.5d0
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for int2_grad1_u12_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l, m
+  double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
+  double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao_test ...'
+  call wall_time(time0)
+
+  provide int2_grad1_u12_ao_test 
+ 
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL                                                              &
+ !$OMP DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+ !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+ !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+
+        weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+        ao_i_r  = aos_in_r_array_transp(ipoint,i)
+        ao_k_r  = aos_in_r_array_transp(ipoint,k)
+
+        b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1))
+        b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2))
+        b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3))
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  do m = 1, 3
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0                   &
+              , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num)
+
+  enddo
+  deallocate(b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/numerical_integ.irp.f b/src/non_h_ints_mu/numerical_integ.irp.f
new file mode 100644
index 00000000..dcd7a52a
--- /dev/null
+++ b/src/non_h_ints_mu/numerical_integ.irp.f
@@ -0,0 +1,623 @@
+
+! --- 
+
+double precision function num_v_ij_u_cst_mu_j1b(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 u12 \phi_i(r2) \phi_j(r2) x v_1b(r2)
+  !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+
+  double precision, external :: ao_value
+  double precision, external :: j12_mu, j1b_nucl, j12_mu_gauss
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_v_ij_u_cst_mu_j1b = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    num_v_ij_u_cst_mu_j1b += ao_value(i, r2) * ao_value(j, r2) * j12_mu_gauss(r1, r2) * j1b_nucl(r2) * final_weight_at_r_vector(jpoint)
+  enddo
+
+  return
+end function num_v_ij_u_cst_mu_j1b
+
+! ---
+
+double precision function num_int2_u2_j1b2(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 u12^2 \phi_i(r2) \phi_j(r2) x v_1b(r2)^2
+  !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint, i_fit
+  double precision           :: r1(3), r2(3)
+  double precision           :: dx, dy, dz, r12, x2, tmp1, tmp2, tmp3, coef, expo
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+  double precision, external :: j12_mu
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_int2_u2_j1b2 = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    dx    = r1(1) - r2(1)
+    dy    = r1(2) - r2(2)
+    dz    = r1(3) - r2(3)
+    x2    = dx * dx + dy * dy + dz * dz 
+    r12   = dsqrt(x2)
+
+    tmp1 = j1b_nucl(r2)
+    tmp2 = tmp1 * tmp1 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint)
+    
+    !tmp3 = 0.d0
+    !do i_fit = 1, n_max_fit_slat
+    !  expo = expo_gauss_j_mu_x_2(i_fit)
+    !  coef = coef_gauss_j_mu_x_2(i_fit)
+    !  tmp3 += coef * dexp(-expo*x2)
+    !enddo
+    tmp3 = j12_mu(r1, r2)
+    tmp3 = tmp3 * tmp3
+
+    num_int2_u2_j1b2 += tmp2 * tmp3
+  enddo
+
+  return
+end function num_int2_u2_j1b2
+
+! ---
+
+double precision function num_int2_grad1u2_grad2u2_j1b2(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 \frac{-[erf(mu r12) -1]^2}{4} \phi_i(r2) \phi_j(r2) x v_1b(r2)^2
+  !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint, i_fit
+  double precision           :: r1(3), r2(3)
+  double precision           :: dx, dy, dz, r12, x2, tmp1, tmp2, tmp3, coef, expo
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_int2_grad1u2_grad2u2_j1b2 = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    dx    = r1(1) - r2(1)
+    dy    = r1(2) - r2(2)
+    dz    = r1(3) - r2(3)
+    x2    = dx * dx + dy * dy + dz * dz 
+    r12   = dsqrt(x2)
+
+    tmp1 = j1b_nucl(r2)
+    tmp2 = tmp1 * tmp1 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint)
+    
+    !tmp3 = 0.d0
+    !do i_fit = 1, n_max_fit_slat
+    !  expo = expo_gauss_1_erf_x_2(i_fit)
+    !  coef = coef_gauss_1_erf_x_2(i_fit)
+    !  tmp3 += coef * dexp(-expo*x2)
+    !enddo
+    tmp3 = derf(mu_erf*r12) - 1.d0
+    tmp3 = tmp3 * tmp3
+
+    tmp3 = -0.25d0 * tmp3
+
+    num_int2_grad1u2_grad2u2_j1b2 += tmp2 * tmp3
+  enddo
+
+  return
+end function num_int2_grad1u2_grad2u2_j1b2
+
+! ---
+
+double precision function num_v_ij_erf_rk_cst_mu_j1b(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 [erf(mu r12) -1]/r12 \phi_i(r2) \phi_j(r2) x v_1b(r2)
+  !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: dx, dy, dz, r12, tmp1, tmp2
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_v_ij_erf_rk_cst_mu_j1b = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    dx    = r1(1) - r2(1)
+    dy    = r1(2) - r2(2)
+    dz    = r1(3) - r2(3)
+    r12   = dsqrt( dx * dx + dy * dy + dz * dz )
+    if(r12 .lt. 1d-10) cycle
+
+    tmp1  = (derf(mu_erf * r12) - 1.d0) / r12
+    tmp2  = tmp1 * ao_value(i, r2) * ao_value(j, r2) * j1b_nucl(r2) * final_weight_at_r_vector(jpoint)
+
+    num_v_ij_erf_rk_cst_mu_j1b += tmp2
+  enddo
+
+  return
+end function num_v_ij_erf_rk_cst_mu_j1b
+
+! ---
+
+subroutine num_x_v_ij_erf_rk_cst_mu_j1b(i, j, ipoint, integ)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 [erf(mu r12) -1]/r12 \phi_i(r2) \phi_j(r2) x v_1b(r2) x r2
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: i, j, ipoint
+  double precision, intent(out) :: integ(3)
+
+  integer                       :: jpoint
+  double precision              :: r1(3), r2(3), grad(3)
+  double precision              :: dx, dy, dz, r12, tmp1, tmp2
+  double precision              :: tmp_x, tmp_y, tmp_z
+
+  double precision, external    :: ao_value
+  double precision, external    :: j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  tmp_x = 0.d0
+  tmp_y = 0.d0
+  tmp_z = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    dx    = r1(1) - r2(1)
+    dy    = r1(2) - r2(2)
+    dz    = r1(3) - r2(3)
+    r12   = dsqrt( dx * dx + dy * dy + dz * dz )
+    if(r12 .lt. 1d-10) cycle
+
+    tmp1  = (derf(mu_erf * r12) - 1.d0) / r12
+    tmp2  = tmp1 * ao_value(i, r2) * ao_value(j, r2) * j1b_nucl(r2) * final_weight_at_r_vector(jpoint)
+
+    tmp_x += tmp2 * r2(1)
+    tmp_y += tmp2 * r2(2)
+    tmp_z += tmp2 * r2(3)
+  enddo
+
+  integ(1) = tmp_x
+  integ(2) = tmp_y
+  integ(3) = tmp_z
+
+  return
+end subroutine num_x_v_ij_erf_rk_cst_mu_j1b
+
+! ---
+
+subroutine num_int2_grad1_u12_ao(i, j, ipoint, integ)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 [-grad_1 u12] \phi_i(r2) \phi_j(r2) x v12_1b(r1, r2)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: i, j, ipoint
+  double precision, intent(out) :: integ(3)
+
+  integer                       :: jpoint
+  double precision              :: tmp, r1(3), r2(3), grad(3)
+  double precision              :: tmp_x, tmp_y, tmp_z
+
+  double precision, external    :: ao_value
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  tmp_x = 0.d0
+  tmp_y = 0.d0
+  tmp_z = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    tmp   = ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint)
+
+    call grad1_jmu_modif_num(r1, r2, grad)
+
+    tmp_x += tmp * (-1.d0 * grad(1))
+    tmp_y += tmp * (-1.d0 * grad(2))
+    tmp_z += tmp * (-1.d0 * grad(3))
+  enddo
+
+  integ(1) = tmp_x
+  integ(2) = tmp_y
+  integ(3) = tmp_z
+
+  return
+end subroutine num_int2_grad1_u12_ao
+
+! ---
+
+double precision function num_gradu_squared_u_ij_mu(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! -0.50 x \int r2 \phi_i(2) \phi_j(2) x v2^2
+  !      [  v1^2    ((grad_1 u12)^2 + (grad_2 u12^2)]) 
+  !      + u12^2         (grad_1 v1)^2 
+  !      + 2 u12 v1 (grad_1 u12) . (grad_1 v1) 
+  !
+  END_DOC
+
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: tmp_x, tmp_y, tmp_z, r12
+  double precision           :: dx1_v1, dy1_v1, dz1_v1, grad_u12(3)
+  double precision           :: tmp1, v1_tmp, v2_tmp, u12_tmp
+  double precision           :: fst_term, scd_term, thd_term, tmp
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+  double precision, external :: j12_mu
+  double precision, external :: grad_x_j1b_nucl
+  double precision, external :: grad_y_j1b_nucl
+  double precision, external :: grad_z_j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_gradu_squared_u_ij_mu = 0.d0
+  do jpoint = 1, n_points_final_grid
+
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    tmp_x = r1(1) - r2(1)
+    tmp_y = r1(2) - r2(2)
+    tmp_z = r1(3) - r2(3)
+    r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
+
+    dx1_v1 = grad_x_j1b_nucl(r1)
+    dy1_v1 = grad_y_j1b_nucl(r1)
+    dz1_v1 = grad_z_j1b_nucl(r1)
+
+    call grad1_j12_mu_exc(r1, r2, grad_u12)
+
+    tmp1    = 1.d0 - derf(mu_erf * r12)
+    v1_tmp  = j1b_nucl(r1)
+    v2_tmp  = j1b_nucl(r2)
+    u12_tmp = j12_mu(r1, r2)
+
+    fst_term = 0.5d0 * tmp1 * tmp1 * v1_tmp * v1_tmp
+    scd_term = u12_tmp * u12_tmp * (dx1_v1*dx1_v1 + dy1_v1*dy1_v1 + dz1_v1*dz1_v1)
+    thd_term = 2.d0 * v1_tmp * u12_tmp * (dx1_v1*grad_u12(1) + dy1_v1*grad_u12(2) + dz1_v1*grad_u12(3))
+
+    tmp = -0.5d0 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint) * (fst_term + scd_term + thd_term) * v2_tmp * v2_tmp
+
+    num_gradu_squared_u_ij_mu += tmp
+  enddo
+
+  return
+end function num_gradu_squared_u_ij_mu
+
+! ---
+
+double precision function num_grad12_j12(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! -0.50 x \int r2 \phi_i(2) \phi_j(2) x v2^2 [v1^2 ((grad_1 u12)^2 + (grad_2 u12^2)]) ]
+  !
+  END_DOC
+
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: tmp_x, tmp_y, tmp_z, r12
+  double precision           :: dx1_v1, dy1_v1, dz1_v1, grad_u12(3)
+  double precision           :: tmp1, v1_tmp, v2_tmp, u12_tmp
+  double precision           :: fst_term, scd_term, thd_term, tmp
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+  double precision, external :: j12_mu
+  double precision, external :: grad_x_j1b_nucl
+  double precision, external :: grad_y_j1b_nucl
+  double precision, external :: grad_z_j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_grad12_j12 = 0.d0
+  do jpoint = 1, n_points_final_grid
+
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    tmp_x = r1(1) - r2(1)
+    tmp_y = r1(2) - r2(2)
+    tmp_z = r1(3) - r2(3)
+    r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
+
+    dx1_v1 = grad_x_j1b_nucl(r1)
+    dy1_v1 = grad_y_j1b_nucl(r1)
+    dz1_v1 = grad_z_j1b_nucl(r1)
+
+    call grad1_j12_mu_exc(r1, r2, grad_u12)
+
+    tmp1    = 1.d0 - derf(mu_erf * r12)
+    v1_tmp  = j1b_nucl(r1)
+    v2_tmp  = j1b_nucl(r2)
+    u12_tmp = j12_mu(r1, r2)
+
+    fst_term = 0.5d0 * tmp1 * tmp1 * v1_tmp * v1_tmp
+
+    tmp = -0.5d0 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint) * fst_term * v2_tmp * v2_tmp
+
+    num_grad12_j12 += tmp
+  enddo
+
+  return
+end function num_grad12_j12
+
+! ---
+
+double precision function num_u12sq_j1bsq(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! -0.50 x \int r2 \phi_i(2) \phi_j(2) x v2^2 [ u12^2 (grad_1 v1)^2 ]
+  !
+  END_DOC
+
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: tmp_x, tmp_y, tmp_z, r12
+  double precision           :: dx1_v1, dy1_v1, dz1_v1, grad_u12(3)
+  double precision           :: tmp1, v1_tmp, v2_tmp, u12_tmp
+  double precision           :: fst_term, scd_term, thd_term, tmp
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+  double precision, external :: j12_mu
+  double precision, external :: grad_x_j1b_nucl
+  double precision, external :: grad_y_j1b_nucl
+  double precision, external :: grad_z_j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_u12sq_j1bsq = 0.d0
+  do jpoint = 1, n_points_final_grid
+
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    tmp_x = r1(1) - r2(1)
+    tmp_y = r1(2) - r2(2)
+    tmp_z = r1(3) - r2(3)
+    r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
+
+    dx1_v1 = grad_x_j1b_nucl(r1)
+    dy1_v1 = grad_y_j1b_nucl(r1)
+    dz1_v1 = grad_z_j1b_nucl(r1)
+
+    call grad1_j12_mu_exc(r1, r2, grad_u12)
+
+    tmp1    = 1.d0 - derf(mu_erf * r12)
+    v1_tmp  = j1b_nucl(r1)
+    v2_tmp  = j1b_nucl(r2)
+    u12_tmp = j12_mu(r1, r2)
+
+    scd_term = u12_tmp * u12_tmp * (dx1_v1*dx1_v1 + dy1_v1*dy1_v1 + dz1_v1*dz1_v1)
+
+    tmp = -0.5d0 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint) * scd_term * v2_tmp * v2_tmp
+
+    num_u12sq_j1bsq += tmp
+  enddo
+
+  return
+end function num_u12sq_j1bsq
+
+! ---
+
+double precision function num_u12_grad1_u12_j1b_grad1_j1b(i, j, ipoint)
+
+  BEGIN_DOC
+  !
+  ! -0.50 x \int r2 \phi_i(2) \phi_j(2) x v2^2 [ 2 u12 v1 (grad_1 u12) . (grad_1 v1) ]
+  !
+  END_DOC
+
+
+  implicit none
+
+  integer, intent(in)        :: i, j, ipoint
+
+  integer                    :: jpoint
+  double precision           :: r1(3), r2(3)
+  double precision           :: tmp_x, tmp_y, tmp_z, r12
+  double precision           :: dx1_v1, dy1_v1, dz1_v1, grad_u12(3)
+  double precision           :: tmp1, v1_tmp, v2_tmp, u12_tmp
+  double precision           :: fst_term, scd_term, thd_term, tmp
+
+  double precision, external :: ao_value
+  double precision, external :: j1b_nucl
+  double precision, external :: j12_mu
+  double precision, external :: grad_x_j1b_nucl
+  double precision, external :: grad_y_j1b_nucl
+  double precision, external :: grad_z_j1b_nucl
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  num_u12_grad1_u12_j1b_grad1_j1b = 0.d0
+  do jpoint = 1, n_points_final_grid
+
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+
+    tmp_x = r1(1) - r2(1)
+    tmp_y = r1(2) - r2(2)
+    tmp_z = r1(3) - r2(3)
+    r12   = dsqrt(tmp_x*tmp_x + tmp_y*tmp_y + tmp_z*tmp_z)
+
+    dx1_v1 = grad_x_j1b_nucl(r1)
+    dy1_v1 = grad_y_j1b_nucl(r1)
+    dz1_v1 = grad_z_j1b_nucl(r1)
+
+    call grad1_j12_mu_exc(r1, r2, grad_u12)
+
+    tmp1    = 1.d0 - derf(mu_erf * r12)
+    v1_tmp  = j1b_nucl(r1)
+    v2_tmp  = j1b_nucl(r2)
+    u12_tmp = j12_mu(r1, r2)
+
+    thd_term = 2.d0 * v1_tmp * u12_tmp * (dx1_v1*grad_u12(1) + dy1_v1*grad_u12(2) + dz1_v1*grad_u12(3))
+
+    tmp = -0.5d0 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint) * thd_term * v2_tmp * v2_tmp
+
+    num_u12_grad1_u12_j1b_grad1_j1b += tmp
+  enddo
+
+  return
+end function num_u12_grad1_u12_j1b_grad1_j1b
+
+! ---
+
+subroutine num_int2_u_grad1u_total_j1b2(i, j, ipoint, integ)
+
+  BEGIN_DOC
+  !
+  ! \int dr2 u12 (grad_1 u12) \phi_i(r2) \phi_j(r2) x v_1b(r2)^2
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: i, j, ipoint
+  double precision, intent(out) :: integ(3)
+
+  integer                       :: jpoint
+  double precision              :: r1(3), r2(3), grad(3)
+  double precision              :: dx, dy, dz, r12, tmp0, tmp1, tmp2
+  double precision              :: tmp_x, tmp_y, tmp_z
+
+  double precision, external    :: ao_value
+  double precision, external    :: j1b_nucl
+  double precision, external    :: j12_mu
+
+  r1(1) = final_grid_points(1,ipoint)
+  r1(2) = final_grid_points(2,ipoint)
+  r1(3) = final_grid_points(3,ipoint)
+
+  tmp_x = 0.d0
+  tmp_y = 0.d0
+  tmp_z = 0.d0
+  do jpoint = 1, n_points_final_grid
+    r2(1) = final_grid_points(1,jpoint)
+    r2(2) = final_grid_points(2,jpoint)
+    r2(3) = final_grid_points(3,jpoint)
+    dx    = r1(1) - r2(1)
+    dy    = r1(2) - r2(2)
+    dz    = r1(3) - r2(3)
+    r12   = dsqrt( dx * dx + dy * dy + dz * dz )
+    if(r12 .lt. 1d-10) cycle
+
+    tmp0 = j1b_nucl(r2)
+    tmp1 = 0.5d0 * j12_mu(r1, r2) * (1.d0 - derf(mu_erf * r12)) / r12
+    tmp2 = tmp0 * tmp0 * tmp1 * ao_value(i, r2) * ao_value(j, r2) * final_weight_at_r_vector(jpoint)
+
+    tmp_x += tmp2 * dx 
+    tmp_y += tmp2 * dy 
+    tmp_z += tmp2 * dz 
+  enddo
+
+  integ(1) = tmp_x
+  integ(2) = tmp_y
+  integ(3) = tmp_z
+
+  return
+end subroutine num_int2_u_grad1u_total_j1b2
+
+! ---
diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f
new file mode 100644
index 00000000..c535d0c5
--- /dev/null
+++ b/src/non_h_ints_mu/test_non_h_ints.irp.f
@@ -0,0 +1,102 @@
+program test_non_h
+ implicit none
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 50
+  my_n_pt_a_grid = 74
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+!call routine_grad_squared
+ call routine_fit
+end
+
+subroutine routine_lapl_grad
+ implicit none
+ integer :: i,j,k,l
+ double precision :: grad_lapl, get_ao_tc_sym_two_e_pot,new,accu,contrib
+ double precision :: ao_two_e_integral_erf,get_ao_two_e_integral,count_n,accu_relat
+! !!!!!!!!!!!!!!!!!!!!! WARNING
+! THIS ROUTINE MAKES SENSE ONLY IF HAND MODIFIED coef_gauss_eff_pot(1:n_max_fit_slat) = 0. to cancel (1-erf(mu*r12))^2
+ accu = 0.d0
+ accu_relat = 0.d0
+ count_n = 0.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     grad_lapl  = get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map) ! pure gaussian part : comes from Lapl
+     grad_lapl += ao_two_e_integral_erf(i, k, j, l)                        ! erf(mu r12)/r12    : comes from Lapl
+     grad_lapl += ao_non_hermit_term_chemist(k,i,l,j)                      ! \grad u(r12) . grad
+     new        = tc_grad_and_lapl_ao(k,i,l,j)
+     new       += get_ao_two_e_integral(i,j,k,l,ao_integrals_map)
+     contrib    = dabs(new - grad_lapl)
+     if(dabs(grad_lapl).gt.1.d-12)then
+      count_n += 1.d0
+      accu_relat += 2.0d0 * contrib/dabs(grad_lapl+new)
+     endif
+     if(contrib.gt.1.d-10)then
+      print*,i,j,k,l
+      print*,grad_lapl,new,contrib
+      print*,2.0d0*contrib/dabs(grad_lapl+new+1.d-12)
+     endif 
+     accu += contrib
+    enddo
+   enddo
+  enddo
+ enddo
+ print*,'accu      = ',accu/count_n
+ print*,'accu/rel  = ',accu_relat/count_n
+
+end
+
+subroutine routine_grad_squared
+ implicit none
+ integer :: i,j,k,l
+ double precision :: grad_squared, get_ao_tc_sym_two_e_pot,new,accu,contrib
+ double precision :: count_n,accu_relat
+! !!!!!!!!!!!!!!!!!!!!! WARNING
+! THIS ROUTINE MAKES SENSE ONLY IF HAND MODIFIED coef_gauss_eff_pot(n_max_fit_slat:n_max_fit_slat+1) = 0. to cancel exp(-'mu*r12)^2)
+ accu = 0.d0
+ accu_relat = 0.d0
+ count_n = 0.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     grad_squared  = get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map) ! pure gaussian part : comes from Lapl
+     new        = tc_grad_square_ao(k,i,l,j)
+     contrib    = dabs(new - grad_squared)
+     if(dabs(grad_squared).gt.1.d-12)then
+      count_n += 1.d0
+      accu_relat += 2.0d0 * contrib/dabs(grad_squared+new)
+     endif
+     if(contrib.gt.1.d-10)then
+      print*,i,j,k,l
+      print*,grad_squared,new,contrib
+      print*,2.0d0*contrib/dabs(grad_squared+new+1.d-12)
+     endif 
+     accu += contrib
+    enddo
+   enddo
+  enddo
+ enddo
+ print*,'accu      = ',accu/count_n
+ print*,'accu/rel  = ',accu_relat/count_n
+
+end
+
+subroutine routine_fit
+ implicit none
+ integer :: i,nx
+ double precision :: dx,xmax,x,j_mu,j_mu_F_x_j,j_mu_fit_gauss
+ nx = 500
+ xmax = 5.d0
+ dx = xmax/dble(nx)
+ x = 0.d0
+ print*,'coucou',mu_erf
+ do i = 1, nx
+  write(33,'(100(F16.10,X))') x,j_mu(x),j_mu_F_x_j(x),j_mu_fit_gauss(x)
+  x += dx
+ enddo
+
+end
diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f
new file mode 100644
index 00000000..81747553
--- /dev/null
+++ b/src/non_h_ints_mu/total_tc_int.irp.f
@@ -0,0 +1,91 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: wall1, wall0
+
+  print *, ' providing ao_tc_int_chemist ...'
+  call wall_time(wall0)
+  
+  if(test_cycle_tc)then
+   ao_tc_int_chemist = ao_tc_int_chemist_test
+  else
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+  endif
+
+  call wall_time(wall1)
+  print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: wall1, wall0
+
+  print *, ' providing ao_tc_int_chemist_test ...'
+  call wall_time(wall0)
+
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_and_lapl_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for ao_tc_int_chemist_test ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! ao_two_e_coul(k,i,l,j) = ( k i | 1/r12 | l j ) = < l k | 1/r12 | j i > 
+  !
+  END_DOC
+
+  integer                    :: i, j, k, l
+  double precision           :: integral
+  double precision, external :: get_ao_two_e_integral
+
+  PROVIDE ao_integrals_map
+
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+
+          !  < 1:k, 2:l | 1:i, 2:j > 
+          integral = get_ao_two_e_integral(i, j, k, l, ao_integrals_map)
+
+          ao_two_e_coul(k,i,l,j) = integral
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
new file mode 100644
index 00000000..5d5477bc
--- /dev/null
+++ b/src/tc_keywords/EZFIO.cfg
@@ -0,0 +1,185 @@
+[read_rl_eigv]
+type: logical
+doc: If |true|, read the right/left eigenvectors from ezfio
+interface: ezfio,provider,ocaml
+default: False
+
+[comp_left_eigv]
+type: logical
+doc: If |true|, computes also the left-eigenvector 
+interface: ezfio,provider,ocaml
+default: False
+
+[three_body_h_tc]
+type: logical
+doc: If |true|, three-body terms are included
+interface: ezfio,provider,ocaml
+default: True 
+
+[pure_three_body_h_tc]
+type: logical
+doc: If |true|, pure triple excitation three-body terms are included
+interface: ezfio,provider,ocaml
+default: False
+
+[double_normal_ord]
+type: logical
+doc: If |true|, contracted double excitation three-body terms are included
+interface: ezfio,provider,ocaml
+default: False
+
+[core_tc_op]
+type: logical
+doc: If |true|, takes the usual Hamiltonian for core orbitals (assumed to be doubly occupied) 
+interface: ezfio,provider,ocaml
+default: False
+
+[full_tc_h_solver]
+type: logical
+doc: If |true|, you diagonalize the full TC H matrix 
+interface: ezfio,provider,ocaml
+default: False
+
+[thresh_it_dav]
+type: Threshold
+doc: Thresholds on the energy for iterative Davidson used in TC
+interface: ezfio,provider,ocaml
+default: 1.e-5
+
+[max_it_dav]
+type: integer
+doc: nb max of iteration in Davidson used in TC
+interface: ezfio,provider,ocaml
+default: 1000
+
+[thresh_psi_r]
+type: Threshold
+doc: Thresholds on the coefficients of the right-eigenvector. Used for PT2 computation.
+interface: ezfio,provider,ocaml
+default: 0.000005
+
+[thresh_psi_r_norm]
+type: logical
+doc: If |true|, you prune the WF to compute the PT1 coef based on the norm. If False, the pruning is done through the amplitude on the right-coefficient. 
+interface: ezfio,provider,ocaml
+default: False
+
+[state_following_tc] 
+type: logical
+doc: If |true|, the states are re-ordered to match the input states
+default: False
+interface: ezfio,provider,ocaml
+
+[bi_ortho]
+type: logical
+doc: If |true|, the MO basis is assumed to be bi-orthonormal
+interface: ezfio,provider,ocaml
+default: True
+
+[symetric_fock_tc]
+type: logical
+doc: If |true|, using F+F^t as Fock TC 
+interface: ezfio,provider,ocaml
+default: False
+
+[thresh_tcscf]
+type: Threshold
+doc: Threshold on the convergence of the Hartree Fock energy.
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[n_it_tcscf_max]
+type: Strictly_positive_int
+doc: Maximum number of SCF iterations
+interface: ezfio,provider,ocaml
+default: 100
+
+[j1b_pen]
+type: double precision
+doc: exponents of the 1-body Jastrow
+interface: ezfio
+size: (nuclei.nucl_num)
+
+[j1b_coeff]
+type: double precision
+doc: coeff of the 1-body Jastrow
+interface: ezfio
+size: (nuclei.nucl_num)
+
+[j1b_type]
+type: integer
+doc: type of 1-body Jastrow
+interface: ezfio, provider, ocaml
+default: 0
+
+[thr_degen_tc]
+type: Threshold
+doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[maxovl_tc]
+type: logical
+doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors 
+interface: ezfio,provider,ocaml
+default: False
+
+[ng_fit_jast]
+type: integer
+doc: nb of Gaussians used to fit Jastrow fcts
+interface: ezfio,provider,ocaml
+default: 20
+
+[tcscf_algorithm]
+type: character*(32)
+doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS]
+interface: ezfio,provider,ocaml
+default: Simple 
+
+[test_cycle_tc]
+type: logical
+doc: If |true|, the integrals of the three-body jastrow are computed with cycles
+interface: ezfio,provider,ocaml
+default: True
+
+[thresh_biorthog_diag]
+type: Threshold
+doc: Threshold to determine if diagonal elements of the bi-orthogonal condition L.T x R are close enouph to 1
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_biorthog_nondiag]
+type: Threshold
+doc: Threshold to determine if non-diagonal elements of L.T x R are close enouph to 0
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[max_dim_diis_tcscf]
+type: integer
+doc: Maximum size of the DIIS extrapolation procedure
+interface: ezfio,provider,ocaml
+default: 15
+
+[threshold_diis_tcscf]
+type: Threshold
+doc: Threshold on the convergence of the DIIS error vector during a TCSCF calculation. If 0. is chosen, the square root of thresh_tcscf will be used.
+interface: ezfio,provider,ocaml
+default: 0.
+
+[level_shift_tcscf]
+type: Positive_float
+doc: Energy shift on the virtual MOs to improve TCSCF convergence
+interface: ezfio,provider,ocaml
+default: 0.
+
+[im_thresh_tcscf]
+type: Threshold
+doc: Thresholds on the Imag part of energy
+interface: ezfio,provider,ocaml
+default: 1.e-7
+
+[debug_tc_pt2]
+type: integer
+doc: If :: 1 then you compute the TC-PT2 the old way, :: 2 then you check with the new version but without three-body
+interface: ezfio,provider,ocaml
+default: -1
diff --git a/src/tc_keywords/NEED b/src/tc_keywords/NEED
new file mode 100644
index 00000000..f1c051ff
--- /dev/null
+++ b/src/tc_keywords/NEED
@@ -0,0 +1,2 @@
+ezfio_files
+nuclei
diff --git a/src/tc_keywords/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f
new file mode 100644
index 00000000..57250b52
--- /dev/null
+++ b/src/tc_keywords/j1b_pen.irp.f
@@ -0,0 +1,116 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ]
+
+  BEGIN_DOC
+  ! exponents of the 1-body Jastrow
+  END_DOC
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_tc_keywords_j1b_pen(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(j1b_pen, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read j1b_pen with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+
+    if (mpi_master) then
+      write(6,'(A)') '.. >>>>> [ IO READ: j1b_pen ] <<<<< ..'
+      call ezfio_get_tc_keywords_j1b_pen(j1b_pen)
+      IRP_IF MPI
+        call MPI_BCAST(j1b_pen, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          stop 'Unable to read j1b_pen with MPI'
+        endif
+      IRP_ENDIF
+    endif
+
+  else
+ 
+    integer :: i
+    do i = 1, nucl_num
+      j1b_pen(i) = 1d5
+    enddo
+
+  endif
+ print*,'parameters for nuclei jastrow'
+ do i = 1, nucl_num
+  print*,'i,Z,j1b_pen(i)',i,nucl_charge(i),j1b_pen(i)
+ enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, j1b_coeff, (nucl_num) ]
+
+  BEGIN_DOC
+  ! coefficients of the 1-body Jastrow
+  END_DOC
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_tc_keywords_j1b_coeff(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(j1b_coeff, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read j1b_coeff with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+
+    if (mpi_master) then
+      write(6,'(A)') '.. >>>>> [ IO READ: j1b_coeff ] <<<<< ..'
+      call ezfio_get_tc_keywords_j1b_coeff(j1b_coeff)
+      IRP_IF MPI
+        call MPI_BCAST(j1b_coeff, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          stop 'Unable to read j1b_coeff with MPI'
+        endif
+      IRP_ENDIF
+    endif
+
+  else
+ 
+    integer :: i
+    do i = 1, nucl_num
+      j1b_coeff(i) = 0d5
+    enddo
+
+  endif
+
+END_PROVIDER
+
+! ---
diff --git a/src/tc_keywords/tc_keywords.irp.f b/src/tc_keywords/tc_keywords.irp.f
new file mode 100644
index 00000000..3bc68550
--- /dev/null
+++ b/src/tc_keywords/tc_keywords.irp.f
@@ -0,0 +1,7 @@
+program tc_keywords
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+end
diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f
index 38e198dc..15d79622 100644
--- a/src/utils/integration.irp.f
+++ b/src/utils/integration.irp.f
@@ -129,6 +129,106 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
 
 end
 
+subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, LD_A, B_center, n_points)
+
+  BEGIN_DOC
+  ! Transforms the product of
+  !          (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta)
+  ! into
+  !        fact_k * [ sum (l_x = 0,i_order(1)) P_new(l_x,1) * (x-P_center(1))^l_x ] exp (- p (x-P_center(1))^2 )
+  !               * [ sum (l_y = 0,i_order(2)) P_new(l_y,2) * (y-P_center(2))^l_y ] exp (- p (y-P_center(2))^2 )
+  !               * [ sum (l_z = 0,i_order(3)) P_new(l_z,3) * (z-P_center(3))^l_z ] exp (- p (z-P_center(3))^2 )
+  !
+  ! WARNING                      :: : IF fact_k is too smal then:
+  ! returns a "s" function centered in zero
+  ! with an inifinite exponent and a zero polynom coef
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+  integer,          intent(in)  :: n_points, ldp, LD_A
+  integer,          intent(in)  :: a(3), b(3)              ! powers : (x-xa)**a_x = (x-A(1))**a(1)
+  double precision, intent(in)  :: alpha, beta             ! exponents
+  double precision, intent(in)  :: A_center(LD_A,3)        ! A center
+  double precision, intent(in)  :: B_center(3)             ! B center
+  integer,          intent(out) :: iorder(3)               ! i_order(i) = order of the polynomials
+  double precision, intent(out) :: P_center(n_points,3)    ! new center
+  double precision, intent(out) :: p                       ! new exponent
+  double precision, intent(out) :: fact_k(n_points)        ! constant factor
+  double precision, intent(out) :: P_new(n_points,0:ldp,3) ! polynomial
+
+  integer                       :: n_new, i, j, ipoint, lda, ldb, xyz
+  double precision, allocatable :: P_a(:,:,:), P_b(:,:,:)
+
+
+  call gaussian_product_v(alpha, A_center, LD_A, beta, B_center, fact_k, p, P_center, n_points)
+
+  if(ior(ior(b(1), b(2)), b(3)) == 0) then  ! b == (0,0,0)
+
+    iorder(1:3) = a(1:3)
+
+    lda = maxval(a)
+    allocate(P_a(n_points,0:lda,3))
+    !ldb = 0
+    !allocate(P_b(n_points,0:0,3))
+
+    !call recentered_poly2_v0(P_a, lda, A_center, LD_A, P_center, a, P_b, B_center, P_center, n_points)
+    call recentered_poly2_v0(P_a, lda, A_center, LD_A, P_center, a, n_points)
+
+    do ipoint = 1, n_points
+      do xyz = 1, 3
+        !P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz)
+        do i = 1, a(xyz)
+          !P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+          P_new(ipoint,i,xyz) = P_a(ipoint,i,xyz)
+        enddo
+      enddo
+    enddo
+
+    deallocate(P_a)
+    !deallocate(P_b)
+
+    return
+  endif
+
+  lda = maxval(a)
+  ldb = maxval(b)
+  allocate(P_a(n_points,0:lda,3), P_b(n_points,0:ldb,3))
+
+  call recentered_poly2_v(P_a, lda, A_center, LD_A, P_center, a, P_b, ldb, B_center, P_center, b, n_points)
+
+  iorder(1:3) = a(1:3) + b(1:3)
+
+  do xyz = 1, 3
+    if(b(xyz) == 0) then
+
+      do ipoint = 1, n_points
+        !P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz)
+        do i = 1, a(xyz)
+          !P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+          P_new(ipoint,i,xyz) = P_a(ipoint,i,xyz)
+        enddo
+      enddo
+
+    else
+
+      do i = 0, iorder(xyz)
+        do ipoint = 1, n_points
+          P_new(ipoint,i,xyz) = 0.d0
+        enddo
+      enddo
+
+      call multiply_poly_v(P_a(1,0,xyz), a(xyz), P_b(1,0,xyz), b(xyz), P_new(1,0,xyz), ldp, n_points)
+
+    endif
+  enddo
+
+end subroutine give_explicit_poly_and_gaussian_v
+
+! ---
 
 subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim)
   BEGIN_DOC
@@ -232,6 +332,64 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp)
 end subroutine
 
 
+subroutine gaussian_product_v(a, xa, LD_xa, b, xb, k, p, xp, n_points)
+
+  BEGIN_DOC
+  !
+  ! Gaussian product in 1D.
+  ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
+  !
+  ! Using multiple A centers
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: LD_xa, n_points
+  double precision, intent(in)  :: a, b                  ! Exponents
+  double precision, intent(in)  :: xa(LD_xa,3), xb(3)    ! Centers
+  double precision, intent(out) :: p                     ! New exponent
+  double precision, intent(out) :: xp(n_points,3)        ! New center
+  double precision, intent(out) :: k(n_points)           ! Constant
+
+  integer                       :: ipoint
+  double precision              :: p_inv
+  double precision              :: xab(3), ab, ap, bp, bpxb(3)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
+
+  ASSERT (a>0.)
+  ASSERT (b>0.)
+
+  p = a+b
+  p_inv = 1.d0/(a+b)
+  ab = a*b*p_inv
+  ap = a*p_inv
+  bp = b*p_inv
+  bpxb(1) = bp*xb(1)
+  bpxb(2) = bp*xb(2)
+  bpxb(3) = bp*xb(3)
+
+  do ipoint = 1, n_points
+    xab(1) = xa(ipoint,1)-xb(1)
+    xab(2) = xa(ipoint,2)-xb(2)
+    xab(3) = xa(ipoint,3)-xb(3)
+    k(ipoint) = ab*(xab(1)*xab(1)+xab(2)*xab(2)+xab(3)*xab(3))
+    if (k(ipoint) > 40.d0) then
+      k(ipoint)=0.d0
+      xp(ipoint,1) = 0.d0
+      xp(ipoint,2) = 0.d0
+      xp(ipoint,3) = 0.d0
+    else
+      k(ipoint) = dexp(-k(ipoint))
+      xp(ipoint,1) = ap*xa(ipoint,1)+bpxb(1)
+      xp(ipoint,2) = ap*xa(ipoint,2)+bpxb(2)
+      xp(ipoint,3) = ap*xa(ipoint,3)+bpxb(3)
+    endif
+  enddo
+
+end subroutine gaussian_product_v
+
+! ---
 
 
 subroutine gaussian_product_x(a,xa,b,xb,k,p,xp)
@@ -313,6 +471,43 @@ subroutine multiply_poly(b,nb,c,nc,d,nd)
 
 end
 
+subroutine multiply_poly_v(b,nb,c,nc,d,nd,n_points)
+  implicit none
+  BEGIN_DOC
+  ! Multiply pairs of polynomials
+  ! D(t) += B(t)*C(t)
+  END_DOC
+
+  integer, intent(in)            :: nb, nc, n_points
+  integer, intent(in)            :: nd
+  double precision, intent(in)   :: b(n_points,0:nb), c(n_points,0:nc)
+  double precision, intent(inout) :: d(n_points,0:nd)
+
+  integer                        :: ib, ic, id, k, ipoint
+  if (nd < nb+nc) then
+     print *, nd,  nb, nc
+     print *, irp_here, ': nd < nb+nc'
+     stop 1
+  endif
+
+  do ic = 0,nc
+    do ipoint=1, n_points
+      d(ipoint,ic) = d(ipoint,ic) + c(ipoint,ic) * b(ipoint,0)
+    enddo
+  enddo
+
+  do ib=1,nb
+    do ipoint=1, n_points
+      d(ipoint, ib) = d(ipoint, ib) + c(ipoint,0) * b(ipoint, ib)
+    enddo
+    do ic = 1,nc
+      do ipoint=1, n_points
+        d(ipoint, ib+ic) = d(ipoint, ib+ic) + c(ipoint,ic) * b(ipoint, ib)
+      enddo
+    enddo
+  enddo
+end
+
 subroutine add_poly(b,nb,c,nc,d,nd)
   implicit none
   BEGIN_DOC
@@ -369,6 +564,152 @@ subroutine add_poly_multiply(b,nb,cst,d,nd)
 end
 
 
+subroutine recentered_poly2_v(P_new, lda, x_A, LD_xA, x_P, a, P_new2, ldb, x_B, x_Q, b, n_points)
+
+  BEGIN_DOC
+  ! Recenter two polynomials
+  END_DOC
+
+  implicit none
+  integer, intent(in)            :: a(3), b(3), n_points, lda, ldb, LD_xA
+  double precision, intent(in)   :: x_A(LD_xA,3), x_P(n_points,3), x_B(3), x_Q(n_points,3)
+  double precision, intent(out)  :: P_new(n_points,0:lda,3),P_new2(n_points,0:ldb,3)
+  double precision               :: binom_func
+  integer                        :: i,j,k,l, minab(3), maxab(3),ipoint, xyz
+  double precision, allocatable  :: pows_a(:,:), pows_b(:,:)
+  double precision :: fa, fb
+
+  maxab(1:3) = max(a(1:3),b(1:3))
+  minab(1:3) = max(min(a(1:3),b(1:3)),(/0,0,0/))
+
+  allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) )
+
+  do xyz=1,3
+    if ((a(xyz)<0).or.(b(xyz)<0) ) cycle
+    do ipoint=1,n_points
+      pows_a(ipoint,0) = 1.d0
+      pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz))
+      pows_b(ipoint,0) = 1.d0
+      pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz))
+    enddo
+    do i =  2,maxab(xyz)
+      do ipoint=1,n_points
+        pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1)
+        pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1)
+      enddo
+    enddo
+    do ipoint=1,n_points
+      P_new (ipoint,0,xyz) =  pows_a(ipoint,a(xyz))
+      P_new2(ipoint,0,xyz) =  pows_b(ipoint,b(xyz))
+    enddo
+    do i =  1,min(minab(xyz),20)
+      fa =  binom_transp(a(xyz)-i,a(xyz))
+      fb =  binom_transp(b(xyz)-i,b(xyz))
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+    do i =  minab(xyz)+1,min(a(xyz),20)
+      fa =  binom_transp(a(xyz)-i,a(xyz))
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i =  minab(xyz)+1,min(b(xyz),20)
+      fb =  binom_transp(b(xyz)-i,b(xyz))
+      do ipoint=1,n_points
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+    do i =  21,a(xyz)
+      fa =  binom_func(a(xyz),a(xyz)-i)
+      do ipoint=1,n_points
+        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i =  21,b(xyz)
+      fb = binom_func(b(xyz),b(xyz)-i)
+      do ipoint=1,n_points
+        P_new2(ipoint,i,xyz) =  fb * pows_b(ipoint,b(xyz)-i)
+      enddo
+    enddo
+  enddo
+
+end subroutine recentered_poly2_v
+
+! ---
+
+subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, n_points)
+
+  BEGIN_DOC
+  ! 
+  ! Recenter two polynomials. Special case for b=(0,0,0)
+  ! 
+  ! (x - A)^a (x - B)^0 = (x - P + P - A)^a  (x - Q + Q - B)^0
+  !                     = (x - P + P - A)^a 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a(3), n_points, lda, LD_xA
+  double precision, intent(in)  :: x_A(LD_xA,3), x_P(n_points,3)
+  !double precision, intent(in)  :: x_B(3), x_Q(n_points,3)
+  double precision, intent(out) :: P_new(n_points,0:lda,3)
+  !double precision, intent(out) :: P_new2(n_points,3)
+
+  integer                       :: i, j, k, l, xyz, ipoint, maxab(3)
+  double precision              :: fa
+  double precision, allocatable :: pows_a(:,:)
+  !double precision, allocatable :: pows_b(:,:)
+
+  double precision              :: binom_func
+
+  maxab(1:3) = max(a(1:3), (/0,0,0/))
+
+  allocate(pows_a(n_points,-2:maxval(maxab)+4))
+  !allocate(pows_b(n_points,-2:maxval(maxab)+4))
+
+  do xyz = 1, 3
+    if(a(xyz) < 0) cycle
+
+    do ipoint = 1, n_points
+      pows_a(ipoint,0) = 1.d0
+      pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz))
+      !pows_b(ipoint,0) = 1.d0
+      !pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz))
+    enddo
+
+    do i = 2, maxab(xyz)
+      do ipoint = 1, n_points
+        pows_a(ipoint,i) = pows_a(ipoint,i-1) * pows_a(ipoint,1)
+        !pows_b(ipoint,i) = pows_b(ipoint,i-1) * pows_b(ipoint,1)
+      enddo
+    enddo
+
+    do ipoint = 1, n_points
+      P_new (ipoint,0,xyz) =  pows_a(ipoint,a(xyz))
+      !P_new2(ipoint,xyz)   =  pows_b(ipoint,0)
+    enddo
+    do i = 1, min(a(xyz), 20)
+      fa = binom_transp(a(xyz)-i, a(xyz))
+      do ipoint = 1, n_points
+        P_new(ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+    do i = 21, a(xyz)
+      fa = binom_func(a(xyz), a(xyz)-i)
+      do ipoint = 1, n_points
+        P_new(ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i)
+      enddo
+    enddo
+
+  enddo !xyz
+
+  deallocate(pows_a)
+  !deallocate(pows_b)
+
+end subroutine recentered_poly2_v0
 
 subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b)
   implicit none
@@ -412,6 +753,79 @@ subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b)
   enddo
 end
 
+subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol)
+
+  BEGIN_DOC
+  !
+  ! Transform the pol centerd on A:
+  !       [ \sum_i ax_i (x-x_A)^i ] [ \sum_j ay_j (y-y_A)^j ] [ \sum_k az_k (z-z_A)^k ]
+  ! to a pol centered on B
+  !       [ \sum_i bx_i (x-x_B)^i ] [ \sum_j by_j (y-y_B)^j ] [ \sum_k bz_k (z-z_B)^k ]
+  !
+  END_DOC
+
+  ! useful for max_dim
+  include 'constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: iorder(3)
+  double precision, intent(in)  :: A_center(3), B_center(3)
+  double precision, intent(in)  :: A_pol(0:max_dim, 3)
+  double precision, intent(out) :: B_pol(0:max_dim, 3)
+
+  integer                       :: i, Lmax
+
+  do i = 1, 3
+    Lmax = iorder(i)
+    call pol_modif_center_x( A_center(i), B_center(i), Lmax, A_pol(0:Lmax, i), B_pol(0:Lmax, i) )
+  enddo
+
+  return
+end subroutine pol_modif_center
+
+
+
+subroutine pol_modif_center_x(A_center, B_center, iorder, A_pol, B_pol)
+
+  BEGIN_DOC
+  !
+  ! Transform the pol centerd on A:
+  !       [ \sum_i ax_i (x-x_A)^i ]
+  ! to a pol centered on B
+  !       [ \sum_i bx_i (x-x_B)^i ]
+  !
+  ! bx_i = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) j! / [ i! (j-i)! ]
+  !      = \sum_{j=i}^{iorder} ax_j (x_B - x_A)^(j-i) binom_func(j,i)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: iorder
+  double precision, intent(in)  :: A_center, B_center
+  double precision, intent(in)  :: A_pol(0:iorder)
+  double precision, intent(out) :: B_pol(0:iorder)
+
+  integer                       :: i, j
+  double precision              :: fact_tmp, dx
+
+  double precision              :: binom_func
+
+  dx = B_center - A_center
+
+  do i = 0, iorder
+    fact_tmp = 0.d0
+    do j = i, iorder
+      fact_tmp += A_pol(j) * binom_func(j, i) * dx**dble(j-i)
+    enddo
+    B_pol(i) = fact_tmp
+  enddo
+
+  return
+end subroutine pol_modif_center_x
+
+
 
 
 
diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f
index cacc3bf7..081adee3 100644
--- a/src/utils/one_e_integration.irp.f
+++ b/src/utils/one_e_integration.irp.f
@@ -145,3 +145,72 @@ end
 
 
 
+subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, power_B, overlap, n_points)
+
+  BEGIN_DOC
+  !.. math::
+  !
+  !   S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\
+  !   S = S_x S_y S_z
+  !
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: n_points
+  integer,          intent(in)  :: power_A(3), power_B(3)             ! power of the x1 functions
+  double precision, intent(in)  :: A_center(n_points,3), B_center(3)  ! center of the x1 functions
+  double precision, intent(in)  :: alpha, beta
+  double precision, intent(out) :: overlap(n_points)
+
+  integer                       :: i
+  integer                       :: iorder_p(3), ipoint, ldp
+  integer                       :: nmax
+  double precision              :: F_integral_tab(0:max_dim)
+  double precision              :: p, overlap_x, overlap_y, overlap_z
+  double precision              :: F_integral
+  double precision, allocatable :: P_new(:,:,:), P_center(:,:), fact_p(:)
+
+  ldp = maxval(power_A(1:3) + power_B(1:3))
+
+  allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points))
+
+  call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, n_points, B_center, n_points)
+
+  nmax = maxval(iorder_p)
+  do i = 0, nmax
+    F_integral_tab(i) = F_integral(i,p)
+  enddo
+
+  do ipoint = 1, n_points
+
+    if(fact_p(ipoint) .lt. 1d-20) then
+      overlap(ipoint) = 1.d-10
+      cycle
+    endif
+
+    overlap_x = P_new(ipoint,0,1) * F_integral_tab(0)
+    do i = 1, iorder_p(1)
+      overlap_x = overlap_x + P_new(ipoint,i,1) * F_integral_tab(i)
+    enddo
+
+    overlap_y = P_new(ipoint,0,2) * F_integral_tab(0)
+    do i = 1, iorder_p(2)
+      overlap_y = overlap_y + P_new(ipoint,i,2) * F_integral_tab(i)
+    enddo
+
+    overlap_z = P_new(ipoint,0,3) * F_integral_tab(0)
+    do i = 1, iorder_p(3)
+      overlap_z = overlap_z + P_new(ipoint,i,3) * F_integral_tab(i)
+    enddo
+
+    overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_p(ipoint)
+  enddo
+
+  deallocate(P_new, P_center, fact_p)
+
+end subroutine overlap_gaussian_xyz_v
+
+! ---

From 3a68b365153328c050d476fbb51754c7c7a432c4 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Mon, 6 Feb 2023 19:02:19 +0100
Subject: [PATCH 3/7] added three_body_ints

---
 src/three_body_ints/EZFIO.cfg               |  20 +
 src/three_body_ints/NEED                    |   1 +
 src/three_body_ints/io_6_index_tensor.irp.f |  63 +++
 src/three_body_ints/semi_num_ints_mo.irp.f  | 207 +++++++++
 src/three_body_ints/three_body_tensor.irp.f | 106 +++++
 src/three_body_ints/three_e_3_idx.irp.f     | 338 +++++++++++++++
 src/three_body_ints/three_e_4_idx.irp.f     | 347 +++++++++++++++
 src/three_body_ints/three_e_5_idx.irp.f     | 453 ++++++++++++++++++++
 8 files changed, 1535 insertions(+)
 create mode 100644 src/three_body_ints/EZFIO.cfg
 create mode 100644 src/three_body_ints/NEED
 create mode 100644 src/three_body_ints/io_6_index_tensor.irp.f
 create mode 100644 src/three_body_ints/semi_num_ints_mo.irp.f
 create mode 100644 src/three_body_ints/three_body_tensor.irp.f
 create mode 100644 src/three_body_ints/three_e_3_idx.irp.f
 create mode 100644 src/three_body_ints/three_e_4_idx.irp.f
 create mode 100644 src/three_body_ints/three_e_5_idx.irp.f

diff --git a/src/three_body_ints/EZFIO.cfg b/src/three_body_ints/EZFIO.cfg
new file mode 100644
index 00000000..9624c161
--- /dev/null
+++ b/src/three_body_ints/EZFIO.cfg
@@ -0,0 +1,20 @@
+[io_three_body_ints]
+type: Disk_access
+doc: Read/Write the 6 index tensor three-body terms from/to disk [ Write | Read | None ]
+interface: ezfio,provider,ocaml
+default: None
+  
+[symm_3_body_tensor]
+type: logical
+doc: If |true|, you have a symmetrized two body tensor
+interface: ezfio,provider,ocaml
+default: False
+
+  
+[read_3_body_tc_ints]
+type: logical
+doc: If |true|, you read the 3 body integrals from an FCIDUMP like file
+interface: ezfio,provider,ocaml
+default: False
+
+  
diff --git a/src/three_body_ints/NEED b/src/three_body_ints/NEED
new file mode 100644
index 00000000..ad7b6bf8
--- /dev/null
+++ b/src/three_body_ints/NEED
@@ -0,0 +1 @@
+bi_ort_ints
diff --git a/src/three_body_ints/io_6_index_tensor.irp.f b/src/three_body_ints/io_6_index_tensor.irp.f
new file mode 100644
index 00000000..dd654f7e
--- /dev/null
+++ b/src/three_body_ints/io_6_index_tensor.irp.f
@@ -0,0 +1,63 @@
+
+subroutine write_array_6_index_tensor(n_orb,array_tmp,name_file)
+ implicit none
+ integer, intent(in) :: n_orb
+ character*(128),  intent(in) :: name_file 
+ double precision, intent(in) :: array_tmp(n_orb,n_orb,n_orb,n_orb,n_orb,n_orb)
+
+ character*(128)                :: output
+ integer                        :: i_unit_output,getUnitAndOpen
+ PROVIDE ezfio_filename                                                                                                  
+ output=trim(ezfio_filename)//'/work/'//trim(name_file)
+ i_unit_output = getUnitAndOpen(output,'W')
+ write(i_unit_output)array_tmp
+ close(unit=i_unit_output)
+end
+
+subroutine read_array_6_index_tensor(n_orb,array_tmp,name_file)
+ implicit none
+ character*(128)                :: output
+ integer                        :: i_unit_output,getUnitAndOpen
+ integer, intent(in) :: n_orb
+ character*(128),  intent(in)  :: name_file 
+ double precision, intent(out) :: array_tmp(n_orb,n_orb,n_orb,n_orb,n_orb,n_orb)
+ PROVIDE ezfio_filename                                                                                                  
+ output=trim(ezfio_filename)//'/work/'//trim(name_file)
+ i_unit_output = getUnitAndOpen(output,'R')
+ read(i_unit_output)array_tmp
+ close(unit=i_unit_output)
+end
+
+subroutine read_fcidump_3_tc(array)
+ implicit none
+ double precision, intent(out) :: array(mo_num, mo_num, mo_num, mo_num, mo_num, mo_num)
+ integer :: i,j,k,l,m,n,i_mo, Reason
+ double precision :: integral 
+ print*,'Reading the THREE-body integrals from a TC FCIDUMP'
+ open (unit=15, file="TCDUMP-nosym", status='old',    &
+              access='sequential', action='read' )
+ read(15,*)i_mo
+ if(i_mo.ne.mo_num)then
+  print*,'Something went wrong in the read_fcidump_3_tc !'
+  print*,'i_mo.ne.mo_num !'
+  print*,i_mo,mo_num
+  stop
+ endif
+ do 
+  read(15,*,IOSTAT=Reason)integral,i, j, m, k, l, n
+  if(Reason > 0)then
+   print*,'Something went wrong in the I/O of read_fcidump_3_tc'
+   stop
+  else if(Reason < 0)then
+   exit   
+  else
+        !                          1 2 3 1 2 3
+        !                         <i j m|k l n>
+        !                         (ik|jl|mn)
+!        integral = integral * 1.d0/3.d0 !!!! For NECI convention 
+          array(i,j,m,k,l,n) =  integral * 3.d0
+  
+   endif
+  enddo
+
+end
diff --git a/src/three_body_ints/semi_num_ints_mo.irp.f b/src/three_body_ints/semi_num_ints_mo.irp.f
new file mode 100644
index 00000000..831ceb9b
--- /dev/null
+++ b/src/three_body_ints/semi_num_ints_mo.irp.f
@@ -0,0 +1,207 @@
+
+BEGIN_PROVIDER [ double precision, mo_v_ij_erf_rk_cst_mu_naive, ( mo_num, mo_num,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1 )/(2|r - R|) on the MO basis
+ END_DOC
+ integer :: i,j,k,l,ipoint
+ do ipoint = 1, n_points_final_grid
+  mo_v_ij_erf_rk_cst_mu_naive(:,:,ipoint) = 0.d0
+  do i = 1, mo_num
+   do j = 1, mo_num
+    do k = 1, ao_num
+     do l = 1, ao_num
+      mo_v_ij_erf_rk_cst_mu_naive(j,i,ipoint) += mo_coef(l,j) * 0.5d0 * v_ij_erf_rk_cst_mu(l,k,ipoint) * mo_coef(k,i)
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, mo_v_ij_erf_rk_cst_mu, ( mo_num, mo_num,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/(2|r - R|) on the MO basis
+ END_DOC
+ integer :: ipoint
+ !$OMP PARALLEL                  &
+ !$OMP DEFAULT (NONE)            &
+ !$OMP PRIVATE (ipoint) & 
+ !$OMP SHARED (n_points_final_grid,v_ij_erf_rk_cst_mu,mo_v_ij_erf_rk_cst_mu)
+ !$OMP DO SCHEDULE (dynamic)
+ do ipoint = 1, n_points_final_grid
+   call ao_to_mo(v_ij_erf_rk_cst_mu(1,1,ipoint),size(v_ij_erf_rk_cst_mu,1),mo_v_ij_erf_rk_cst_mu(1,1,ipoint),size(mo_v_ij_erf_rk_cst_mu,1))
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+ mo_v_ij_erf_rk_cst_mu = mo_v_ij_erf_rk_cst_mu * 0.5d0
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, mo_v_ij_erf_rk_cst_mu_transp, ( n_points_final_grid,mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/(2|r - R|) on the MO basis
+ END_DOC
+ integer :: ipoint,i,j
+ do i = 1, mo_num
+  do j = 1, mo_num
+   do ipoint = 1, n_points_final_grid
+    mo_v_ij_erf_rk_cst_mu_transp(ipoint,j,i) = mo_v_ij_erf_rk_cst_mu(j,i,ipoint)
+   enddo
+  enddo
+ enddo
+ FREE mo_v_ij_erf_rk_cst_mu
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, mo_x_v_ij_erf_rk_cst_mu_naive, ( mo_num, mo_num,3,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+! int dr  x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1 )/|r - R| on the MO basis
+ END_DOC
+ integer :: i,j,k,l,ipoint,m
+ do ipoint = 1, n_points_final_grid
+  mo_x_v_ij_erf_rk_cst_mu_naive(:,:,:,ipoint) = 0.d0
+  do i = 1, mo_num
+   do j = 1, mo_num
+    do m = 1, 3
+     do k = 1, ao_num
+      do l = 1, ao_num
+       mo_x_v_ij_erf_rk_cst_mu_naive(j,i,m,ipoint) += mo_coef(l,j) * 0.5d0 * x_v_ij_erf_rk_cst_mu_transp(l,k,m,ipoint) * mo_coef(k,i)
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, mo_x_v_ij_erf_rk_cst_mu, ( mo_num, mo_num,3,n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/2|r - R| on the MO basis
+ END_DOC
+ integer :: ipoint,m
+ !$OMP PARALLEL                  &
+ !$OMP DEFAULT (NONE)            &
+ !$OMP PRIVATE (ipoint,m) & 
+ !$OMP SHARED (n_points_final_grid,x_v_ij_erf_rk_cst_mu_transp,mo_x_v_ij_erf_rk_cst_mu)
+ !$OMP DO SCHEDULE (dynamic)
+ do ipoint = 1, n_points_final_grid
+  do m = 1, 3
+   call ao_to_mo(x_v_ij_erf_rk_cst_mu_transp(1,1,m,ipoint),size(x_v_ij_erf_rk_cst_mu_transp,1),mo_x_v_ij_erf_rk_cst_mu(1,1,m,ipoint),size(mo_x_v_ij_erf_rk_cst_mu,1))
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+ mo_x_v_ij_erf_rk_cst_mu = 0.5d0 * mo_x_v_ij_erf_rk_cst_mu
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, mo_x_v_ij_erf_rk_cst_mu_transp, (n_points_final_grid,3, mo_num, mo_num)]
+ implicit none
+ integer :: i,j,m,ipoint
+ do i = 1, mo_num
+  do j = 1, mo_num
+   do m = 1, 3
+    do ipoint = 1, n_points_final_grid
+     mo_x_v_ij_erf_rk_cst_mu_transp(ipoint,m,j,i) = mo_x_v_ij_erf_rk_cst_mu(j,i,m,ipoint)
+    enddo
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, x_W_ij_erf_rk, ( n_points_final_grid,3,mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! W_mn^X(R) = \int dr phi_m(r) phi_n(r) (1 - erf(mu |r-R|)) (x-X)
+ END_DOC
+ include 'constants.include.F'
+ integer :: ipoint,m,i,j
+ double precision :: xyz,cst
+ double precision :: wall0, wall1
+
+ cst = 0.5d0 * inv_sq_pi
+ print*,'providing x_W_ij_erf_rk ...'
+ call wall_time(wall0)
+ !$OMP PARALLEL                  &
+ !$OMP DEFAULT (NONE)            &
+ !$OMP PRIVATE (ipoint,m,i,j,xyz) & 
+ !$OMP SHARED (x_W_ij_erf_rk,n_points_final_grid,mo_x_v_ij_erf_rk_cst_mu_transp,mo_v_ij_erf_rk_cst_mu_transp,mo_num,final_grid_points) 
+ !$OMP DO SCHEDULE (dynamic)
+ do i = 1, mo_num
+  do j = 1, mo_num
+   do m = 1, 3
+    do ipoint = 1, n_points_final_grid
+     xyz = final_grid_points(m,ipoint)
+     x_W_ij_erf_rk(ipoint,m,j,i)  =  mo_x_v_ij_erf_rk_cst_mu_transp(ipoint,m,j,i) - xyz * mo_v_ij_erf_rk_cst_mu_transp(ipoint,j,i)
+    enddo
+   enddo
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+ FREE mo_v_ij_erf_rk_cst_mu_transp 
+ FREE mo_x_v_ij_erf_rk_cst_mu_transp
+ call wall_time(wall1)
+ print*,'time to provide x_W_ij_erf_rk = ',wall1 - wall0
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, sqrt_weight_at_r, (n_points_final_grid)]
+ implicit none
+ integer :: ipoint
+ do ipoint = 1, n_points_final_grid
+  sqrt_weight_at_r(ipoint) = dsqrt(final_weight_at_r_vector(ipoint))
+ enddo
+END_PROVIDER 
+
+!BEGIN_PROVIDER [ double precision, mos_in_r_array_transp_sq_weight, (n_points_final_grid,mo_num)]
+
+
+!BEGIN_PROVIDER [ double precision, gauss_ij_rk_transp, (ao_num, ao_num, n_points_final_grid) ]
+! implicit none
+! integer :: i,j,ipoint
+! do ipoint = 1, n_points_final_grid
+!  do j = 1, ao_num
+!   do i = 1, ao_num
+!    gauss_ij_rk_transp(i,j,ipoint) = gauss_ij_rk(ipoint,i,j)
+!   enddo
+!  enddo
+! enddo
+!END_PROVIDER 
+!
+!
+!BEGIN_PROVIDER [ double precision, mo_gauss_ij_rk, ( mo_num, mo_num,n_points_final_grid)]
+! implicit none
+! integer :: ipoint
+! !$OMP PARALLEL                  &
+! !$OMP DEFAULT (NONE)            &
+! !$OMP PRIVATE (ipoint) & 
+! !$OMP SHARED (n_points_final_grid,gauss_ij_rk_transp,mo_gauss_ij_rk)
+! !$OMP DO SCHEDULE (dynamic)
+! do ipoint = 1, n_points_final_grid
+!  call ao_to_mo(gauss_ij_rk_transp(1,1,ipoint),size(gauss_ij_rk_transp,1),mo_gauss_ij_rk(1,1,ipoint),size(mo_gauss_ij_rk,1))
+! enddo
+! !$OMP END DO
+! !$OMP END PARALLEL
+!
+!END_PROVIDER 
+!
+!BEGIN_PROVIDER [ double precision, mo_gauss_ij_rk_transp, (n_points_final_grid, mo_num, mo_num)]
+! implicit none
+! integer :: i,j,ipoint
+! do ipoint = 1, n_points_final_grid
+!  do j = 1, mo_num
+!   do i = 1, mo_num
+!    mo_gauss_ij_rk_transp(ipoint,i,j) = mo_gauss_ij_rk(i,j,ipoint)
+!   enddo
+!  enddo
+! enddo
+!
+!END_PROVIDER 
+!
diff --git a/src/three_body_ints/three_body_tensor.irp.f b/src/three_body_ints/three_body_tensor.irp.f
new file mode 100644
index 00000000..2b65a925
--- /dev/null
+++ b/src/three_body_ints/three_body_tensor.irp.f
@@ -0,0 +1,106 @@
+BEGIN_PROVIDER [ double precision, three_body_ints, (mo_num, mo_num, mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! matrix element of the -L  three-body operator 
+!
+! notice the -1 sign: in this way three_body_ints can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ three_body_ints = 0.d0
+ print*,'Providing the three_body_ints ...'
+ call wall_time(wall0)
+ name_file = 'six_index_tensor'
+ if(read_three_body_ints)then
+  call read_fcidump_3_tc(three_body_ints)
+ else
+  if(read_three_body_ints)then
+   print*,'Reading three_body_ints from disk ...'
+   call read_array_6_index_tensor(mo_num,three_body_ints,name_file)
+  else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,k,l,m,n,integral) & 
+  !$OMP SHARED (mo_num,three_body_ints)
+  !$OMP DO SCHEDULE (dynamic)
+   do n = 1, mo_num
+    do l = 1, mo_num
+     do k = 1, mo_num
+      do m = n, mo_num
+       do j = l, mo_num
+        do i = k, mo_num
+!!         if(i>=j)then
+           integral = 0.d0
+           call give_integrals_3_body(i,j,m,k,l,n,integral)
+ 
+           three_body_ints(i,j,m,k,l,n) = -1.d0 * integral 
+   
+           ! permutation with k,i
+           three_body_ints(k,j,m,i,l,n) = -1.d0 * integral ! i,k
+           ! two permutations with k,i
+           three_body_ints(k,l,m,i,j,n) = -1.d0 * integral 
+           three_body_ints(k,j,n,i,l,m) = -1.d0 * integral 
+           ! three permutations with k,i
+           three_body_ints(k,l,n,i,j,m) = -1.d0 * integral 
+   
+           ! permutation with l,j
+           three_body_ints(i,l,m,k,j,n) = -1.d0 * integral ! j,l
+           ! two permutations with l,j
+           three_body_ints(k,l,m,i,j,n) = -1.d0 * integral 
+           three_body_ints(i,l,n,k,j,m) = -1.d0 * integral 
+           ! two permutations with l,j
+!!!!        three_body_ints(k,l,n,i,j,m) = -1.d0 * integral 
+   
+           ! permutation with m,n
+           three_body_ints(i,j,n,k,l,m) = -1.d0 * integral ! m,n
+           ! two permutations with m,n
+           three_body_ints(k,j,n,i,l,m) = -1.d0 * integral ! m,n
+           three_body_ints(i,l,n,k,j,m) = -1.d0 * integral ! m,n
+           ! three permutations with k,i
+!!!!        three_body_ints(k,l,n,i,j,m) = -1.d0 * integral ! m,n
+ 
+!!         endif
+        enddo
+       enddo
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  endif
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_ints',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_ints on disk ...'
+  call write_array_6_index_tensor(mo_num,three_body_ints,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+
+
+subroutine give_integrals_3_body(i,j,m,k,l,n,integral)
+ implicit none
+ double precision, intent(out) :: integral
+ integer, intent(in) :: i,j,m,k,l,n
+ double precision :: weight
+ BEGIN_DOC
+! <ijm|L|kln>
+ END_DOC
+ integer :: ipoint,mm
+ integral = 0.d0
+ do mm = 1, 3
+  do ipoint = 1, n_points_final_grid
+   weight = final_weight_at_r_vector(ipoint)                                                                          
+   integral += weight * mos_in_r_array_transp(ipoint,i) * mos_in_r_array_transp(ipoint,k) * x_W_ij_erf_rk(ipoint,mm,m,n) * x_W_ij_erf_rk(ipoint,mm,j,l) 
+   integral += weight * mos_in_r_array_transp(ipoint,j) * mos_in_r_array_transp(ipoint,l) * x_W_ij_erf_rk(ipoint,mm,m,n) * x_W_ij_erf_rk(ipoint,mm,i,k) 
+   integral += weight * mos_in_r_array_transp(ipoint,m) * mos_in_r_array_transp(ipoint,n) * x_W_ij_erf_rk(ipoint,mm,j,l) * x_W_ij_erf_rk(ipoint,mm,i,k) 
+  enddo
+ enddo
+end
+
diff --git a/src/three_body_ints/three_e_3_idx.irp.f b/src/three_body_ints/three_e_3_idx.irp.f
new file mode 100644
index 00000000..13210f00
--- /dev/null
+++ b/src/three_body_ints/three_e_3_idx.irp.f
@@ -0,0 +1,338 @@
+
+BEGIN_PROVIDER [ double precision, three_body_3_index, (mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 3 index matrix element of the -L  three-body operator 
+!
+! three_body_3_index(k,l,n) = < phi_k phi_l phi_n | phi_k phi_l phi_n >
+!
+! notice the -1 sign: in this way three_body_3_index can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,m
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ print*,'Providing the three_body_3_index ...'
+ name_file = 'three_body_3_index'
+ call wall_time(wall0)
+ if(read_three_body_ints)then
+  print*,'Reading three_body_ints from disk ...'
+  call read_array_3_index_tensor(mo_num,three_body_3_index,name_file)
+ else
+  provide x_W_ij_erf_rk
+  three_body_3_index = 0.d0
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,integral) & 
+  !$OMP SHARED (mo_num,three_body_3_index)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(3)
+   do m = 1, mo_num ! 3
+    do j = 1, mo_num ! 2 
+     do i = 1, mo_num ! 1 
+      integral = 0.d0
+      !                          1 2 3 1 2 3
+      call give_integrals_3_body(i,j,m,i,j,m,integral)
+ 
+      three_body_3_index(i,j,m) = -1.d0 * integral 
+   
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_3_index',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_3_index on disk ...' 
+  call write_array_3_index_tensor(mo_num,three_body_3_index,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_3_index_exch_12, (mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 3 index matrix EXCHANGE element of the -L  three-body operator 
+!
+! three_body_3_index_exch_12(k,l,n) = < phi_k phi_l phi_n | phi_l phi_k phi_n >
+!
+! notice the -1 sign: in this way three_body_3_index_exch_12 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,m
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ name_file = 'three_body_3_index_exch_12'
+ print*,'Providing the three_body_3_index_exch_12 ...'
+ call wall_time(wall0)
+ if(read_three_body_ints)then
+  print*,'Reading three_body_ints from disk ...'
+  call read_array_3_index_tensor(mo_num,three_body_3_index_exch_12,name_file)
+ else
+  provide x_W_ij_erf_rk
+  three_body_3_index_exch_12 = 0.d0
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,integral) & 
+  !$OMP SHARED (mo_num,three_body_3_index_exch_12)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(3)
+   do m = 1, mo_num ! 3
+    do j = 1, mo_num ! 2 
+     do i = 1, mo_num ! 1 
+      integral = 0.d0
+      !                          1 2 3 1 2 3
+      call give_integrals_3_body(i,j,m,j,i,m,integral)
+ 
+      three_body_3_index_exch_12(i,j,m) = -1.d0 * integral 
+   
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ 
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_3_index_exch_12',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_3_index_exch_12 on disk ...' 
+  call write_array_3_index_tensor(mo_num,three_body_3_index_exch_12,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_3_index_exch_23, (mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 3 index matrix EXCHANGE element of the -L  three-body operator 
+!
+! three_body_3_index_exch_12(k,l,n) = < phi_k phi_l phi_n | phi_k phi_n phi_l >
+!
+! notice the -1 sign: in this way three_body_3_index_exch_12 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,m
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ print*,'Providing the three_body_3_index_exch_23 ...'
+ call wall_time(wall0)
+ name_file = 'three_body_3_index_exch_23'
+ if(read_three_body_ints)then
+  print*,'Reading three_body_ints from disk ...'
+  call read_array_3_index_tensor(mo_num,three_body_3_index_exch_23,name_file)
+ else
+  provide x_W_ij_erf_rk
+  three_body_3_index_exch_23 = 0.d0
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,integral) & 
+  !$OMP SHARED (mo_num,three_body_3_index_exch_23)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(3)
+   do m = 1, mo_num ! 3
+    do j = 1, mo_num ! 2 
+     do i = 1, mo_num ! 1 
+      integral = 0.d0
+      !                          1 2 3 1 2 3
+      call give_integrals_3_body(i,j,m,i,m,j,integral)
+ 
+      three_body_3_index_exch_23(i,j,m) = -1.d0 * integral 
+   
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  call wall_time(wall1)
+ endif
+ print*,'wall time for three_body_3_index_exch_23',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_3_index_exch_23 on disk ...' 
+  call write_array_3_index_tensor(mo_num,three_body_3_index_exch_23,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_3_index_exch_13, (mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 3 index matrix EXCHANGE element of the -L  three-body operator 
+!
+! three_body_3_index_exch_12(k,l,n) = < phi_k phi_l phi_n | phi_k phi_n phi_l >
+!
+! notice the -1 sign: in this way three_body_3_index_exch_12 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,m
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ print*,'Providing the three_body_3_index_exch_13 ...'
+ call wall_time(wall0)
+ name_file = 'three_body_3_index_exch_13'
+ if(read_three_body_ints)then
+  print*,'Reading three_body_ints from disk ...'
+  call read_array_3_index_tensor(mo_num,three_body_3_index_exch_13,name_file)
+ else
+  provide x_W_ij_erf_rk
+  three_body_3_index_exch_13 = 0.d0
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,integral) & 
+  !$OMP SHARED (mo_num,three_body_3_index_exch_13)
+  !$OMP DO SCHEDULE (guided)
+   do m = 1, mo_num ! 3
+    do j = 1, mo_num ! 2 
+     do i = 1, mo_num ! 1 
+      integral = 0.d0
+      !                          1 2 3 1 2 3
+      call give_integrals_3_body(i,j,m,m,j,i,integral)
+ 
+      three_body_3_index_exch_13(i,j,m) = -1.d0 * integral 
+   
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+
+ call wall_time(wall1)
+ print*,'wall time for three_body_3_index_exch_13',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_3_index_exch_13 on disk ...' 
+  call write_array_3_index_tensor(mo_num,three_body_3_index_exch_13,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, three_body_3_index_exch_231, (mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 3 index matrix element of the -L  three-body operator 
+!
+! three_body_3_index_exch_231(k,l,n) = < phi_k phi_l phi_n | phi_l phi_n phi_k >
+!
+! notice the -1 sign: in this way three_body_3_index can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,m
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ print*,'Providing the three_body_3_index_231 ...'
+ call wall_time(wall0)
+ name_file = 'three_body_3_index_exch_231'
+ if(read_three_body_ints)then
+  print*,'Reading three_body_ints from disk ...'
+  call read_array_3_index_tensor(mo_num,three_body_3_index_exch_231,name_file)
+ else
+  provide x_W_ij_erf_rk
+  three_body_3_index_exch_231 = 0.d0
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,integral) & 
+  !$OMP SHARED (mo_num,three_body_3_index_exch_231)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(3)
+   do m = 1, mo_num ! 3
+    do j = 1, mo_num ! 2 
+     do i = 1, mo_num ! 1 
+      integral = 0.d0
+      !                          1 2 3 1 2 3
+      call give_integrals_3_body(i,j,m,j,m,i,integral)
+ 
+      three_body_3_index_exch_231(i,j,m) = -1.d0 * integral 
+   
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_3_index_exch_231 ',wall1 - wall0
+
+ if(write_three_body_ints)then
+  print*,'Writing three_body_3_index_exch_231 on disk ...' 
+  call write_array_3_index_tensor(mo_num,three_body_3_index_exch_231,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_3_index_exch_312, (mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 3 index matrix element of the -L  three-body operator 
+!
+! three_body_3_index(k,l,n) = < phi_k phi_l phi_n | phi_l phi_n phi_k >
+!
+! notice the -1 sign: in this way three_body_3_index can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,m
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ print*,'Providing the three_body_3_index_312 ...'
+ call wall_time(wall0)
+ name_file = 'three_body_3_index_exch_312'
+ if(read_three_body_ints)then
+  print*,'Reading three_body_ints from disk ...'
+  call read_array_3_index_tensor(mo_num,three_body_3_index_exch_312,name_file)
+ else
+  provide x_W_ij_erf_rk
+  three_body_3_index_exch_312 = 0.d0
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,integral) & 
+  !$OMP SHARED (mo_num,three_body_3_index_exch_312)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(3)
+   do m = 1, mo_num ! 3
+    do j = 1, mo_num ! 2 
+     do i = 1, mo_num ! 1 
+      integral = 0.d0
+      !                          1 2 3 1 2 3
+      call give_integrals_3_body(i,j,m,m,i,j,integral)
+ 
+      three_body_3_index_exch_312(i,j,m) = -1.d0 * integral 
+   
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_3_index_312',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_3_index_exch_312 on disk ...' 
+  call write_array_3_index_tensor(mo_num,three_body_3_index_exch_312,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+subroutine write_array_3_index_tensor(n_orb,array_tmp,name_file)
+ implicit none
+ integer, intent(in) :: n_orb
+ character*(128),  intent(in) :: name_file 
+ double precision, intent(in) :: array_tmp(n_orb,n_orb,n_orb)
+
+ character*(128)                :: output
+ integer                        :: i_unit_output,getUnitAndOpen
+ PROVIDE ezfio_filename                                                                                                  
+ output=trim(ezfio_filename)//'/work/'//trim(name_file)
+ i_unit_output = getUnitAndOpen(output,'W')
+ write(i_unit_output)array_tmp
+ close(unit=i_unit_output)
+end
+
+subroutine read_array_3_index_tensor(n_orb,array_tmp,name_file)
+ implicit none
+ character*(128)                :: output
+ integer                        :: i_unit_output,getUnitAndOpen
+ integer, intent(in) :: n_orb
+ character*(128),  intent(in)  :: name_file 
+ double precision, intent(out) :: array_tmp(n_orb,n_orb,n_orb)
+ PROVIDE ezfio_filename                                                                                                  
+ output=trim(ezfio_filename)//'/work/'//trim(name_file)
+ i_unit_output = getUnitAndOpen(output,'R')
+ read(i_unit_output)array_tmp
+ close(unit=i_unit_output)
+end
diff --git a/src/three_body_ints/three_e_4_idx.irp.f b/src/three_body_ints/three_e_4_idx.irp.f
new file mode 100644
index 00000000..0c6743f0
--- /dev/null
+++ b/src/three_body_ints/three_e_4_idx.irp.f
@@ -0,0 +1,347 @@
+
+BEGIN_PROVIDER [ double precision, three_body_4_index, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 4 index matrix direct element of the -L  three-body operator 
+!
+! three_body_4_index(j,m,k,i) = < phi_j phi_m phi_k | phi_j phi_m phi_i >
+!
+! notice the -1 sign: in this way three_body_4_index can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ three_body_4_index = 0.d0
+ print*,'Providing the three_body_4_index ...'
+ call wall_time(wall0)
+
+ name_file = 'three_body_4_index'
+ if(read_three_body_ints)then
+  print*,'Reading three_body_4_index from disk ...'
+  call read_array_4_index_tensor(mo_num,three_body_4_index,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,k,integral) & 
+  !$OMP SHARED (mo_num,three_body_4_index)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do i = 1, mo_num
+    do k = 1, mo_num
+     do m = 1, mo_num
+      do j = 1, mo_num
+       integral = 0.d0
+       call give_integrals_3_body(i,j,m,k,j,m,integral)
+ 
+       three_body_4_index(j,m,k,i) = -1.d0 * integral 
+   
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_4_index',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_4_index on disk ...' 
+  call write_array_4_index_tensor(mo_num,three_body_4_index,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_4_index_exch_12, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 4 index matrix EXCHANGE element of the -L  three-body operator 
+!                                         
+! three_body_4_index_exch_12(j,m,k,i) = < phi_m phi_j phi_i | phi_j phi_m phi_k >
+!
+! notice the -1 sign: in this way three_body_3_index_exch_12 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ three_body_4_index_exch_12 = 0.d0
+ print*,'Providing the three_body_4_index_exch_12 ...'
+ call wall_time(wall0)
+
+ name_file = 'three_body_4_index_exch_12'
+ if(read_three_body_ints)then
+  print*,'Reading three_body_4_index_exch_12 from disk ...'
+  call read_array_4_index_tensor(mo_num,three_body_4_index_exch_12,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,k,integral) & 
+  !$OMP SHARED (mo_num,three_body_4_index_exch_12)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(4)
+   do i = 1, mo_num
+    do k = 1, mo_num
+     do m = 1, mo_num
+      do j = 1, mo_num
+       integral = 0.d0
+       call give_integrals_3_body(i,m,j,k,j,m,integral)
+ 
+       three_body_4_index_exch_12(j,m,k,i) = -1.d0 * integral 
+   
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_4_index_exch_12',wall1 - wall0
+
+ if(write_three_body_ints)then
+  print*,'Writing three_body_4_index_exch_12 on disk ...' 
+  call write_array_4_index_tensor(mo_num,three_body_4_index_exch_12,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_4_index_exch_12_part, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 4 index matrix EXCHANGE element of the -L  three-body operator 
+!
+! three_body_4_index_exch_12_part(j,m,k,i) = < phi_m phi_j phi_i | phi_m phi_k phi_j >
+!
+! notice the -1 sign: in this way three_body_3_index_exch_12 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ three_body_4_index_exch_12_part = 0.d0
+ print*,'Providing the three_body_4_index_exch_12_part ...'
+ call wall_time(wall0)
+
+ name_file = 'three_body_4_index_exch_12_part'
+ if(read_three_body_ints)then
+  print*,'Reading three_body_4_index_exch_12_part from disk ...'
+  call read_array_4_index_tensor(mo_num,three_body_4_index_exch_12_part,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,k,integral) & 
+  !$OMP SHARED (mo_num,three_body_4_index_exch_12_part)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do i = 1, mo_num
+    do k = 1, mo_num
+     do m = 1, mo_num
+      do j = 1, mo_num
+       integral = 0.d0
+       !                          
+       call give_integrals_3_body(i,j,m,j,k,m,integral)
+       three_body_4_index_exch_12_part(j,m,k,i) = -1.d0 * integral 
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+  call wall_time(wall1)
+ endif
+ print*,'wall time for three_body_4_index_exch_12_part',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_4_index_exch_12_part on disk ...' 
+  call write_array_4_index_tensor(mo_num,three_body_4_index_exch_12_part,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_4_index_exch_12_part_bis, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 4 index matrix EXCHANGE element of the -L  three-body operator 
+!
+! three_body_4_index_exch_12_part_bis(j,m,k,i) = < phi_m phi_j phi_i | phi_m phi_k phi_j >
+!
+! notice the -1 sign: in this way three_body_3_index_exch_12 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ three_body_4_index_exch_12_part_bis = 0.d0
+ print*,'Providing the three_body_4_index_exch_12_part_bis ...'
+ call wall_time(wall0)
+
+ name_file = 'three_body_4_index_exch_12_part_bis'
+ if(read_three_body_ints)then
+  print*,'Reading three_body_4_index_exch_12_part_bisfrom disk ...'
+  call read_array_4_index_tensor(mo_num,three_body_4_index_exch_12_part_bis,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,k,integral) & 
+  !$OMP SHARED (mo_num,three_body_4_index_exch_12_part_bis)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do i = 1, mo_num
+    do k = 1, mo_num
+     do m = 1, mo_num
+      do j = 1, mo_num
+       integral = 0.d0
+       !                          
+       call give_integrals_3_body(i,j,m,m,j,k,integral)
+ 
+       three_body_4_index_exch_12_part_bis(j,m,k,i) = -1.d0 * integral 
+   
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_4_index_exch_12_part_bis',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_4_index_exch_12_part_bis on disk ...' 
+  call write_array_4_index_tensor(mo_num,three_body_4_index_exch_12_part_bis,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, three_body_4_index_exch_231, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 4 index matrix direct element of the -L  three-body operator 
+!
+! three_body_4_index_exch_231(j,m,k,i) = < phi_j phi_m phi_k | phi_j phi_m phi_i >
+!
+! notice the -1 sign: in this way three_body_4_index_exch_231 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ three_body_4_index_exch_231 = 0.d0
+ print*,'Providing the three_body_4_index_exch_231 ...'
+ call wall_time(wall0)
+ name_file = 'three_body_4_index_exch_231'
+ if(read_three_body_ints)then
+  print*,'Reading three_body_4_index_exch_231 from disk ...'
+  call read_array_4_index_tensor(mo_num,three_body_4_index_exch_231,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,k,integral) & 
+  !$OMP SHARED (mo_num,three_body_4_index_exch_231)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do i = 1, mo_num
+    do k = 1, mo_num
+     do m = 1, mo_num
+      do j = 1, mo_num
+       integral = 0.d0
+       call give_integrals_3_body(i,j,m,j,m,k,integral)
+ 
+       three_body_4_index_exch_231(j,m,k,i) = -1.d0 * integral 
+   
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_4_index_exch_231',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_4_index_exch_231 on disk ...' 
+  call write_array_4_index_tensor(mo_num,three_body_4_index_exch_231,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_4_index_exch_312, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 4 index matrix direct element of the -L  three-body operator 
+!
+! three_body_4_index_exch_312(j,m,k,i) = < phi_j phi_m phi_k | phi_j phi_m phi_i >
+!
+! notice the -1 sign: in this way three_body_4_index_exch_312 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ three_body_4_index_exch_312 = 0.d0
+ print*,'Providing the three_body_4_index_exch_312 ...'
+ call wall_time(wall0)
+ name_file = 'three_body_4_index_exch_312'
+ if(read_three_body_ints)then
+  print*,'Reading three_body_4_index_exch_312 from disk ...'
+  call read_array_4_index_tensor(mo_num,three_body_4_index_exch_312,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (i,j,m,k,integral) & 
+  !$OMP SHARED (mo_num,three_body_4_index_exch_312)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do i = 1, mo_num
+    do k = 1, mo_num
+     do m = 1, mo_num
+      do j = 1, mo_num
+       integral = 0.d0
+       call give_integrals_3_body(i,j,m,m,k,j,integral)
+ 
+       three_body_4_index_exch_312(j,m,k,i) = -1.d0 * integral 
+   
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_4_index_exch_312',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_4_index_exch_312 on disk ...' 
+  call write_array_4_index_tensor(mo_num,three_body_4_index_exch_312,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+subroutine write_array_4_index_tensor(n_orb,array_tmp,name_file)
+ implicit none
+ integer, intent(in) :: n_orb
+ character*(128),  intent(in) :: name_file 
+ double precision, intent(in) :: array_tmp(n_orb,n_orb,n_orb,n_orb)
+
+ character*(128)                :: output
+ integer                        :: i_unit_output,getUnitAndOpen
+ PROVIDE ezfio_filename                                                                                                  
+ output=trim(ezfio_filename)//'/work/'//trim(name_file)
+ i_unit_output = getUnitAndOpen(output,'W')
+ write(i_unit_output)array_tmp
+ close(unit=i_unit_output)
+end
+
+subroutine read_array_4_index_tensor(n_orb,array_tmp,name_file)
+ implicit none
+ character*(128)                :: output
+ integer                        :: i_unit_output,getUnitAndOpen
+ integer, intent(in) :: n_orb
+ character*(128),  intent(in)  :: name_file 
+ double precision, intent(out) :: array_tmp(n_orb,n_orb,n_orb,n_orb)
+ PROVIDE ezfio_filename                                                                                                  
+ output=trim(ezfio_filename)//'/work/'//trim(name_file)
+ i_unit_output = getUnitAndOpen(output,'R')
+ read(i_unit_output)array_tmp
+ close(unit=i_unit_output)
+end
diff --git a/src/three_body_ints/three_e_5_idx.irp.f b/src/three_body_ints/three_e_5_idx.irp.f
new file mode 100644
index 00000000..914601ff
--- /dev/null
+++ b/src/three_body_ints/three_e_5_idx.irp.f
@@ -0,0 +1,453 @@
+
+BEGIN_PROVIDER [ double precision, three_body_5_index, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 5 index matrix element of the -L  three-body operator 
+!
+! three_body_5_index(i,j,m,l,n) = < phi_i phi_j phi_m | phi_i phi_l phi_n >
+!
+! notice the -1 sign: in this way three_body_5_index can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ three_body_5_index(1:mo_num, 1:mo_num, 1:mo_num, 1:mo_num, 1:mo_num) = 0.d0
+ print*,'Providing the three_body_5_index ...'
+ name_file = 'three_body_5_index'
+ call wall_time(wall0)
+ if(read_three_body_ints)then
+  print*,'Reading three_body_5_index from disk ...'
+  call read_array_5_index_tensor(mo_num,three_body_5_index,name_file)
+ else
+  provide x_W_ij_erf_rk
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (j,k,l,m,n,integral) & 
+  !$OMP SHARED (mo_num,three_body_5_index)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do n = 1, mo_num
+    do l = 1, mo_num
+     do k = 1, mo_num
+!      do m = n, mo_num
+!       do j = l, mo_num
+      do m = 1, mo_num
+       do j = 1, mo_num
+         integral = 0.d0
+         
+         call give_integrals_3_body(j,m,k,l,n,k,integral)
+ 
+         three_body_5_index(k,j,m,l,n) = -1.d0 * integral 
+   
+       enddo
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_5_index',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_5_index on disk ...' 
+  call write_array_5_index_tensor(mo_num,three_body_5_index,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+! do n = 1, mo_num
+!  do l = 1, mo_num
+!   do k = 1, mo_num
+!    do m = 1, n-1
+!     do j = 1, l-1
+!      three_body_5_index(k,j,m,l,n) = three_body_5_index(k,l,n,j,m)
+!      three_body_5_index(k,j,m,l,n)
+!     enddo
+!    enddo
+!   enddo
+!  enddo
+! enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_5_index_exch_13, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 5 index matrix element of the -L  three-body operator 
+!
+! three_body_5_index_exch_13(k,j,m,l,n) = < phi_j phi_m phi_k | phi_k phi_n phi_l >
+!
+! notice the -1 sign: in this way three_body_5_index_exch_13 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ 
+ three_body_5_index_exch_13 = 0.d0
+
+ name_file = 'three_body_5_index_exch_13'
+ print*,'Providing the three_body_5_index_exch_13 ...'
+ call wall_time(wall0)
+ if(read_three_body_ints)then
+  print*,'Reading three_body_5_index_exch_13 from disk ...'
+  call read_array_5_index_tensor(mo_num,three_body_5_index_exch_13,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (j,k,l,m,n,integral) & 
+  !$OMP SHARED (mo_num,three_body_5_index_exch_13)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do n = 1, mo_num
+    do l = 1, mo_num
+     do k = 1, mo_num
+!      do m = n, mo_num
+!       do j = l, mo_num
+      do m = 1, mo_num
+       do j = 1, mo_num
+         integral = 0.d0
+!!                                  j,m,k,l,n,k : direct (case 2)
+         call give_integrals_3_body(j,m,k,k,n,l,integral)
+!!                                  j,m,k,k,n,l : exchange 1 3
+ 
+         three_body_5_index_exch_13(k,j,m,l,n) = -1.d0 * integral 
+       enddo
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_5_index_exch_13',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_5_index_exch_13 on disk ...' 
+  call write_array_5_index_tensor(mo_num,three_body_5_index_exch_13,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+! do n = 1, mo_num
+!  do l = 1, mo_num
+!   do k = 1, mo_num
+!    do m = n, mo_num
+!     do j = l, mo_num
+!      three_body_5_index_exch_13(k,l,n,j,m) = three_body_5_index_exch_13(k,j,m,l,n)
+!     enddo
+!    enddo
+!   enddo
+!  enddo
+! enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_5_index_exch_32, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 5 index matrix element of the -L  three-body operator 
+!
+! three_body_5_index_exch_32(i,j,m,l,n) = < phi_i phi_j phi_m | phi_i phi_l phi_n >
+!
+! notice the -1 sign: in this way three_body_5_index_exch_32 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(328) :: name_file 
+ 
+ three_body_5_index_exch_32 = 0.d0
+ name_file = 'three_body_5_index_exch_32'
+ print*,'Providing the three_body_5_index_exch_32 ...'
+ call wall_time(wall0)
+
+ if(read_three_body_ints)then
+  print*,'Reading three_body_5_index_exch_32 from disk ...'
+  call read_array_5_index_tensor(mo_num,three_body_5_index_exch_32,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (j,k,l,m,n,integral) & 
+  !$OMP SHARED (mo_num,three_body_5_index_exch_32)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do n = 1, mo_num
+    do l = 1, mo_num
+     do k = 1, mo_num
+!      do m = n, mo_num
+!       do j = l, mo_num
+      do m = 1, mo_num
+       do j = 1, mo_num
+         integral = 0.d0
+!!                                  j,m,k,l,n,k : direct (case 3)
+         call give_integrals_3_body(j,m,k,l,k,n,integral)
+!!                                  j,m,k,l,k,n : exchange 2 3
+ 
+         three_body_5_index_exch_32(k,j,m,l,n) = -1.d0 * integral 
+   
+       enddo
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_5_index_exch_32',wall1 - wall0
+ if(write_three_body_ints)then
+  print*,'Writing three_body_5_index_exch_32 on disk ...' 
+  call write_array_5_index_tensor(mo_num,three_body_5_index_exch_32,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+! do n = 1, mo_num
+!  do l = 1, mo_num
+!   do k = 1, mo_num
+!    do m = n, mo_num
+!     do j = l, mo_num
+!      three_body_5_index_exch_32(k,l,n,j,m) = three_body_5_index_exch_32(k,j,m,l,n)
+!     enddo
+!    enddo
+!   enddo
+!  enddo
+! enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_5_index_exch_12, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 5 index matrix element of the -L  three-body operator 
+!
+! three_body_5_index_exch_12(i,j,m,l,n) = < phi_i phi_j phi_m | phi_i phi_l phi_n >
+!
+! notice the -1 sign: in this way three_body_5_index_exch_12 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: i,j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(328) :: name_file 
+
+ three_body_5_index_exch_12 = 0.d0
+ name_file = 'three_body_5_index_exch_12'
+ print*,'Providing the three_body_5_index_exch_12 ...'
+ call wall_time(wall0)
+
+ if(read_three_body_ints)then
+  print*,'Reading three_body_5_index_exch_12 from disk ...'
+  call read_array_5_index_tensor(mo_num,three_body_5_index_exch_12,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (j,k,l,m,n,integral) & 
+  !$OMP SHARED (mo_num,three_body_5_index_exch_12)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do n = 1, mo_num
+    do l = 1, mo_num
+     do k = 1, mo_num
+!      do m = n, mo_num
+!       do j = l, mo_num
+      do m = 1, mo_num
+       do j = 1, mo_num
+         integral = 0.d0
+!!                                  j,m,k,l,n,k : direct (case 1)
+         call give_integrals_3_body(j,m,k,n,l,k,integral)
+!!                                  j,m,k,l,k,n : exchange 2 3
+ 
+         three_body_5_index_exch_12(k,j,m,l,n) = -1.d0 * integral 
+   
+       enddo
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_5_index_exch_12',wall1 - wall0
+! do n = 1, mo_num
+!  do l = 1, mo_num
+!   do k = 1, mo_num
+!    do m = n, mo_num
+!     do j = l, mo_num
+!      three_body_5_index_exch_12(k,l,n,j,m) = three_body_5_index_exch_12(k,j,m,l,n)
+!     enddo
+!    enddo
+!   enddo
+!  enddo
+! enddo
+ if(write_three_body_ints)then
+  print*,'Writing three_body_5_index_exch_12 on disk ...' 
+  call write_array_5_index_tensor(mo_num,three_body_5_index_exch_12,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, three_body_5_index_312, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 5 index matrix element of the -L  three-body operator 
+!
+! three_body_5_index_312(i,j,m,l,n) = < phi_i phi_j phi_m | phi_i phi_l phi_n >
+!
+! notice the -1 sign: in this way three_body_5_index_312 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ 
+ three_body_5_index_312 = 0.d0
+ name_file = 'three_body_5_index_312'
+ print*,'Providing the three_body_5_index_312 ...'
+ call wall_time(wall0)
+
+ if(read_three_body_ints)then
+  print*,'Reading three_body_5_index_312 from disk ...'
+  call read_array_5_index_tensor(mo_num,three_body_5_index_312,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (j,k,l,m,n,integral) & 
+  !$OMP SHARED (mo_num,three_body_5_index_312)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do n = 1, mo_num
+    do l = 1, mo_num
+     do k = 1, mo_num
+!      do m = n, mo_num
+!       do j = l, mo_num
+      do m = 1, mo_num
+       do j = 1, mo_num
+         integral = 0.d0
+         
+         !                         <j m k|l n k> - > <j m k|n k l>
+         call give_integrals_3_body(j,m,k,n,k,l,integral)
+ 
+         three_body_5_index_312(k,j,m,l,n) = -1.d0 * integral 
+   
+       enddo
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_5_index_312',wall1 - wall0
+! do n = 1, mo_num
+!  do l = 1, mo_num
+!   do k = 1, mo_num
+!    do m = n, mo_num
+!     do j = l, mo_num
+!      three_body_5_index_312(k,l,n,j,m) = three_body_5_index_312(k,j,m,l,n)
+!     enddo
+!    enddo
+!   enddo
+!  enddo
+! enddo
+ if(write_three_body_ints)then
+  print*,'Writing three_body_5_index_312 on disk ...' 
+  call write_array_5_index_tensor(mo_num,three_body_5_index_312,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_body_5_index_132, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! 5 index matrix element of the -L  three-body operator 
+!
+! three_body_5_index_132(i,j,m,l,n) = < phi_i phi_j phi_m | phi_i phi_l phi_n >
+!
+! notice the -1 sign: in this way three_body_5_index_132 can be directly used to compute Slater rules :)
+ END_DOC
+ integer :: j,k,l,m,n
+ double precision :: integral, wall1, wall0
+ character*(128) :: name_file 
+ three_body_5_index_132 = 0.d0
+ name_file = 'three_body_5_index_132'
+ print*,'Providing the three_body_5_index_132 ...'
+ call wall_time(wall0)
+
+ if(read_three_body_ints)then
+  print*,'Reading three_body_5_index_132 from disk ...'
+  call read_array_5_index_tensor(mo_num,three_body_5_index_132,name_file)
+ else
+  provide x_W_ij_erf_rk
+  !$OMP PARALLEL                  &
+  !$OMP DEFAULT (NONE)            &
+  !$OMP PRIVATE (j,k,l,m,n,integral) & 
+  !$OMP SHARED (mo_num,three_body_5_index_132)
+  !$OMP DO SCHEDULE (guided) COLLAPSE(2)
+   do n = 1, mo_num
+    do l = 1, mo_num
+     do k = 1, mo_num
+!      do m = n, mo_num
+!       do j = l, mo_num
+      do m = 1, mo_num
+       do j = 1, mo_num
+         integral = 0.d0
+         
+         !                         <j m k|l n k> - > <j m k|k l n>
+         call give_integrals_3_body(j,m,k,k,l,n,integral)
+ 
+         three_body_5_index_132(k,j,m,l,n) = -1.d0 * integral 
+   
+       enddo
+      enddo
+     enddo
+    enddo
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+ endif
+ call wall_time(wall1)
+ print*,'wall time for three_body_5_index_132',wall1 - wall0
+! do n = 1, mo_num
+!  do l = 1, mo_num
+!   do k = 1, mo_num
+!    do m = n, mo_num
+!     do j = l, mo_num
+!      three_body_5_index_132(k,l,n,j,m) = three_body_5_index_132(k,j,m,l,n)
+!     enddo
+!    enddo
+!   enddo
+!  enddo
+! enddo
+ if(write_three_body_ints)then
+  print*,'Writing three_body_5_index_132 on disk ...' 
+  call write_array_5_index_tensor(mo_num,three_body_5_index_132,name_file)
+  call ezfio_set_three_body_ints_io_three_body_ints("Read")
+ endif
+
+END_PROVIDER 
+
+subroutine write_array_5_index_tensor(n_orb,array_tmp,name_file)
+ implicit none
+ integer, intent(in) :: n_orb
+ character*(128),  intent(in) :: name_file 
+ double precision, intent(in) :: array_tmp(n_orb,n_orb,n_orb,n_orb,n_orb)
+
+ character*(128)                :: output
+ integer                        :: i_unit_output,getUnitAndOpen
+ PROVIDE ezfio_filename                                                                                                  
+ output=trim(ezfio_filename)//'/work/'//trim(name_file)
+ i_unit_output = getUnitAndOpen(output,'W')
+ write(i_unit_output)array_tmp
+ close(unit=i_unit_output)
+end
+
+subroutine read_array_5_index_tensor(n_orb,array_tmp,name_file)
+ implicit none
+ character*(128)                :: output
+ integer                        :: i_unit_output,getUnitAndOpen
+ integer, intent(in) :: n_orb
+ character*(128),  intent(in)  :: name_file 
+ double precision, intent(out) :: array_tmp(n_orb,n_orb,n_orb,n_orb,n_orb)
+ PROVIDE ezfio_filename                                                                                                  
+ output=trim(ezfio_filename)//'/work/'//trim(name_file)
+ i_unit_output = getUnitAndOpen(output,'R')
+ read(i_unit_output)array_tmp
+ close(unit=i_unit_output)
+end

From ca4cdf56d5e022b06d59f57097d2ab9cf29856f6 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Mon, 6 Feb 2023 19:03:22 +0100
Subject: [PATCH 4/7] added non_hermit_dav

---
 src/non_hermit_dav/NEED                       |    1 +
 src/non_hermit_dav/biorthog.irp.f             | 1156 +++++++
 src/non_hermit_dav/gram_schmit.irp.f          |   56 +
 src/non_hermit_dav/htilde_mat.irp.f           |   93 +
 .../lapack_diag_non_hermit.irp.f              | 2907 +++++++++++++++++
 src/non_hermit_dav/new_routines.irp.f         |  670 ++++
 src/non_hermit_dav/project.irp.f              |   53 +
 src/non_hermit_dav/utils.irp.f                |  325 ++
 8 files changed, 5261 insertions(+)
 create mode 100644 src/non_hermit_dav/NEED
 create mode 100644 src/non_hermit_dav/biorthog.irp.f
 create mode 100644 src/non_hermit_dav/gram_schmit.irp.f
 create mode 100644 src/non_hermit_dav/htilde_mat.irp.f
 create mode 100644 src/non_hermit_dav/lapack_diag_non_hermit.irp.f
 create mode 100644 src/non_hermit_dav/new_routines.irp.f
 create mode 100644 src/non_hermit_dav/project.irp.f
 create mode 100644 src/non_hermit_dav/utils.irp.f

diff --git a/src/non_hermit_dav/NEED b/src/non_hermit_dav/NEED
new file mode 100644
index 00000000..9487075c
--- /dev/null
+++ b/src/non_hermit_dav/NEED
@@ -0,0 +1 @@
+utils
diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f
new file mode 100644
index 00000000..78fddf54
--- /dev/null
+++ b/src/non_hermit_dav/biorthog.irp.f
@@ -0,0 +1,1156 @@
+subroutine non_hrmt_diag_split_degen(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  integer                       :: n_good
+  double precision              :: shift,shift_current
+  double precision              :: r,thr
+  integer,          allocatable :: list_good(:), iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+
+
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  shift = 1.d-15
+  shift_current = shift
+  iteration = 1 
+  logical :: good_ortho
+  good_ortho = .False.
+  do while(n_real_eigv.ne.n.or. .not.good_ortho)
+   if(shift.gt.1.d-3)then
+    print*,'shift > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   print*,'***** iteration = ',iteration
+   print*,'shift = ',shift
+   allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+   Aw = A_save
+   do i = 1, n
+    do j = 1, n
+     if(dabs(Aw(j,i)).lt.shift)then
+      Aw(j,i) = 0.d0
+     endif
+    enddo
+   enddo
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   allocate(im_part(n),iorder(n))
+   do i = 1, n
+    im_part(i) = -dabs(WI(i))
+    iorder(i) = i
+   enddo
+   call dsort(im_part, iorder, n)
+
+   shift_current = max(10.d0 * dabs(im_part(1)),shift)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+   Aw = A_save
+   call split_matrix_degen(Aw,n,shift_current)
+   deallocate( im_part, iorder )
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   ! You track the real eigenvalues 
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_good += 1
+     else
+       print*,'Found an imaginary component to eigenvalue'
+       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   allocate( list_good(n_good), iorder(n_good) )
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_good += 1
+       list_good(n_good) = i
+       eigval(n_good) = WR(i)
+     endif
+   enddo
+   deallocate( WR, WI )
+ 
+   n_real_eigv = n_good 
+   do i = 1, n_good
+     iorder(i) = i
+   enddo
+ 
+   ! You sort the real eigenvalues 
+   call dsort(eigval, iorder, n_good)
+ 
+   reigvec(:,:) = 0.d0 
+   leigvec(:,:) = 0.d0 
+   do i = 1, n_real_eigv
+     do j = 1, n
+       reigvec_tmp(j,i) = VR(j,list_good(iorder(i)))
+       leigvec_tmp(j,i) = Vl(j,list_good(iorder(i)))
+     enddo
+   enddo
+
+   if(n_real_eigv == n)then
+    allocate(S(n,n))
+    call check_bi_ortho(reigvec_tmp,leigvec_tmp,n,S,accu_nd)
+    print*,'accu_nd = ',accu_nd
+    double precision :: accu_nd
+    good_ortho = accu_nd .lt. 1.d-10
+    deallocate(S)
+   endif
+ 
+   deallocate( list_good, iorder )
+   deallocate( VL, VR, Aw)
+   shift *= 10.d0
+   iteration += 1
+  enddo
+  do i = 1, n
+   do j = 1, n
+    reigvec(iorder_origin(j),i) = reigvec_tmp(j,i)
+    leigvec(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+end subroutine non_hrmt_diag_split_degen
+
+! ---
+
+subroutine non_hrmt_real_diag_new(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: shift,shift_current
+  double precision              :: r,thr
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: im_part(:)
+
+
+  print*,'Computing the left/right eigenvectors ...'
+
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  shift = 1.d-10
+  do while(n_real_eigv.ne.n.or.shift.gt.1.d-3)
+   allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+   Aw = A
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   allocate(im_part(n), iorder(n))
+   do i = 1, n
+    im_part(i) = -dabs(WI(i))
+    iorder(i) = i
+   enddo
+   shift_current = max(10.d0 * dabs(im_part(1)),shift)
+   print*,'adding random number of magnitude ',shift_current
+   Aw = A
+   do i = 1, n
+     call RANDOM_NUMBER(r)
+     Aw(i,i) += shift_current * r
+   enddo
+   deallocate( im_part, iorder )
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+ 
+   ! You track the real eigenvalues 
+   thr = 1.d-10
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.thr)then
+       n_good += 1
+     else
+       print*,'Found an imaginary component to eigenvalue'
+       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+ 
+   allocate( list_good(n_good), iorder(n_good) )
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.thr)then
+       n_good += 1
+       list_good(n_good) = i
+       eigval(n_good) = WR(i)
+     endif
+   enddo
+ 
+   deallocate( WR, WI )
+ 
+   n_real_eigv = n_good 
+   do i = 1, n_good
+     iorder(i) = i
+   enddo
+ 
+   ! You sort the real eigenvalues 
+   call dsort(eigval, iorder, n_good)
+ 
+   reigvec(:,:) = 0.d0 
+   leigvec(:,:) = 0.d0 
+   do i = 1, n_real_eigv
+     do j = 1, n
+       reigvec(j,i) = VR(j,list_good(iorder(i)))
+       leigvec(j,i) = Vl(j,list_good(iorder(i)))
+     enddo
+   enddo
+ 
+   deallocate( list_good, iorder )
+   deallocate( VL, VR, Aw)
+   shift *= 10.d0
+  enddo
+  if(shift.gt.1.d-3)then
+   print*,'shift > 1.d-3 !!'
+   print*,'Your matrix intrinsically contains complex eigenvalues'
+  endif
+
+end subroutine non_hrmt_real_diag_new
+
+! ---
+
+subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(in)  :: thr_d, thr_nd
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: thr, thr_cut, thr_diag, thr_norm
+  double precision              :: accu_d, accu_nd
+
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  !print *, ' '
+  !print *, ' Computing the left/right eigenvectors ...'
+  !print *, ' '
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n)) 
+  
+  !print *, ' fock matrix'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') A(i,:)
+  !enddo
+
+  !thr_cut = 1.d-15
+  !call cancel_small_elmts(A, n, thr_cut)
+
+  !call lapack_diag_non_sym_right(n, A, WR, WI, VR)
+  call lapack_diag_non_sym(n, A, WR, WI, VL, VR)
+  !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR)
+
+  !print *, ' '
+  !print *, ' eigenvalues'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') WR(i), WI(i)
+  !enddo
+  !print *, ' right eigenvect bef' 
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') VR(:,i)
+  !enddo
+  !print *, ' left eigenvect bef'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') VL(:,i)
+  !enddo
+
+  thr_diag = 1d-06
+  thr_norm = 1d+10
+  call check_EIGVEC(n, n, A, WR, VL, VR, thr_diag, thr_norm, .false.)
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  n_good = 0
+  !thr    = 100d0
+  thr    = Im_thresh_tcscf
+  do i = 1, n
+    !print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    if(dabs(WI(i)) .lt. thr) then
+      n_good += 1
+    else
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+
+  if(n_good.ne.n)then
+   print*,'there are some imaginary eigenvalues '
+   thr_diag = 1d-03
+   n_good = n
+  endif
+  allocate(list_good(n_good), iorder(n_good))
+
+  n_good = 0
+  do i = 1, n
+    n_good += 1
+    list_good(n_good) = i
+    eigval(n_good) = WR(i)
+  enddo
+
+  deallocate( WR, WI )
+
+  n_real_eigv = n_good 
+  do i = 1, n_good
+    iorder(i) = i
+  enddo
+  call dsort(eigval, iorder, n_good)
+      
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n_real_eigv
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = VL(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+  deallocate( list_good, iorder )
+  deallocate( VL, VR )
+
+  ASSERT(n==n_real_eigv)
+
+  !print *, ' eigenvalues'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') eigval(i)
+  !enddo
+  !print *, ' right eigenvect aft ord' 
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') reigvec(:,i)
+  !enddo
+  !print *, ' left eigenvect aft ord'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') leigvec(:,i)
+  !enddo
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  thr_diag = 10.d0
+  thr_norm = 1d+10
+
+  allocate( S(n_real_eigv,n_real_eigv) )
+  call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .false.)
+
+  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .lt. thr_d) ) then
+
+    !print *, ' lapack vectors are normalized and bi-orthogonalized'
+    deallocate(S)
+    return
+
+  ! accu_nd is modified after adding the normalization
+  !elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
+
+  !  print *, ' lapack vectors are not normalized but bi-orthogonalized'
+  !  call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
+
+  !  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
+
+  !  deallocate(S)
+  !  return
+
+  else
+
+    !print *, ' lapack vectors are not normalized neither bi-orthogonalized'
+
+    ! ---
+
+!   call impose_orthog_degen_eigvec(n, eigval, reigvec)
+!   call impose_orthog_degen_eigvec(n, eigval, leigvec)
+
+    call impose_biorthog_degen_eigvec(n, eigval, leigvec, reigvec)
+
+
+    !call impose_orthog_biorthog_degen_eigvec(n, thr_d, thr_nd, eigval, leigvec, reigvec)
+
+    !call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, ao_overlap, leigvec, reigvec)
+
+    ! ---
+
+    call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .false.)
+    if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .gt. thr_d) ) then
+      call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
+    endif
+    call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
+
+    !call impose_biorthog_qr(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec)
+    !call impose_biorthog_lu(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec)
+
+    ! ---
+
+    call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
+
+    deallocate(S)
+
+  endif
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  return
+
+end subroutine non_hrmt_bieig
+
+! ---
+
+subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+  double precision :: r
+
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) )
+
+  Aw(:,:) = A(:,:)
+  call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR)
+
+  thr    = 1.d-12
+  double precision, allocatable :: im_part(:)
+  n_good = 0
+  do i = 1, n
+    if( dabs(WI(i)).lt.thr ) then
+      n_good += 1
+    else
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+  print*,'n_good = ',n_good
+  if(n_good .lt. n)then
+   print*,'Removing degeneracies to remove imaginary parts'
+   allocate(im_part(n),iorder(n))
+   r = 0.d0
+   do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+   enddo
+   call dsort(im_part,iorder,n) 
+   thr = 10.d0 * dabs(im_part(1))
+   print*,'adding random numbers on the diagonal of magnitude ',thr
+   Aw(:,:) = A(:,:)
+   do i = 1, n
+     call RANDOM_NUMBER(r)
+     print*,'r = ',r*thr
+     Aw(i,i) += thr * r
+   enddo
+   print*,'Rediagonalizing the matrix with random numbers'
+   call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR)
+   deallocate(im_part,iorder)
+  endif
+  deallocate( Aw )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  n_good = 0
+  thr    = 1.d-5
+  do i = 1, n
+    if( dabs(WI(i)).lt.thr ) then
+      n_good += 1
+    else
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+  print*,'n_good = ',n_good
+  allocate( list_good(n_good), iorder(n_good) )
+
+  n_good = 0
+  do i = 1, n
+    if( dabs(WI(i)).lt.thr ) then
+      n_good += 1
+      list_good(n_good) = i
+      eigval(n_good) = WR(i)
+    endif
+  enddo
+
+  deallocate( WR, WI )
+
+  n_real_eigv = n_good 
+  do i = 1, n_good
+    iorder(i) = i
+  enddo
+  call dsort(eigval, iorder, n_good)
+      
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n_real_eigv
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = VL(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+  deallocate( list_good, iorder )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n_real_eigv,n_real_eigv) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n, 1.d0          &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n_real_eigv
+    do j = 1, n_real_eigv
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    ! L x R is already bi-orthogonal
+
+    print *, ' L & T bi-orthogonality: ok'
+    deallocate( S )
+    return
+
+  else
+    ! impose bi-orthogonality 
+
+    print *, ' L & T bi-orthogonality: not imposed yet'
+    print *, ' accu_nd = ', accu_nd
+    call impose_biorthog_qr(n, n_real_eigv, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec)
+    deallocate( S )
+  
+  endif
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  return
+
+end subroutine non_hrmt_bieig_random_diag
+
+! ---
+
+subroutine non_hrmt_real_im(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_bad
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+  double precision :: r
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), iorder(n))
+
+  Aw(:,:) = A(:,:)
+   do i = 1, n
+     call RANDOM_NUMBER(r)
+     Aw(i,i) += 10.d-10* r
+   enddo
+  call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR)
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  i = 1
+  thr    = 1.d-15
+  n_real_eigv = 0
+  do while (i.le.n) 
+!    print*,i,dabs(WI(i))
+    if( dabs(WI(i)).gt.thr ) then
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) , Im(i)  ', WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)
+      i+=1
+      print*, 'Re(i+1),Im(i+1)',WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)
+      i+=1
+    else  
+      n_real_eigv += 1
+      iorder(i) = i
+      eigval(i) = WR(i)
+      i+=1
+    endif
+  enddo
+  call dsort(eigval, iorder, n)
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n
+    do j = 1, n
+      reigvec(j,i) = VR(j,iorder(i))
+      leigvec(j,i) = VL(j,iorder(i))
+    enddo
+  enddo
+
+  deallocate( iorder )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n,n) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0          &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  deallocate( S )
+
+end subroutine non_hrmt_real_im
+
+! ---
+
+subroutine non_hrmt_generalized_real_im(n, A, B, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors 
+  ! for A R = lambda B R and A^\dagger L = lambda B^\dagger L
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n),B(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_bad
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: Aw(:,:),Bw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:), beta(:)
+  double precision, allocatable :: S(:,:)
+  double precision :: r
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), Bw(n,n),iorder(n),beta(n))
+
+  Aw(:,:) = A(:,:)
+  Bw(:,:) = B(:,:)
+  call lapack_diag_general_non_sym(n,Aw,Bw,WR,beta,WI,VL,VR)
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  i = 1
+  thr    = 1.d-10
+  n_real_eigv = 0
+  do while (i.le.n) 
+    if( dabs(WI(i)).gt.thr ) then
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) , Im(i)  ', WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)/(beta(i) + 1.d-10)
+      i+=1
+      print*, 'Re(i+1),Im(i+1)',WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)/(beta(i) + 1.d-10)
+      i+=1
+    else  
+      n_real_eigv += 1
+      iorder(i) = i
+      eigval(i) = WR(i)/(beta(i) + 1.d-10)
+      i+=1
+    endif
+  enddo
+  call dsort(eigval, iorder, n)
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n
+    do j = 1, n
+      reigvec(j,i) = VR(j,iorder(i))
+      leigvec(j,i) = VL(j,iorder(i))
+    enddo
+  enddo
+
+  deallocate( iorder )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n,n) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0          &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  deallocate( S )
+
+end subroutine non_hrmt_generalized_real_im
+
+! ---
+
+subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+  double precision, allocatable :: eigval_sorted(:)
+
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) )
+  Aw(:,:) = A(:,:)
+
+  call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR)
+
+  deallocate( Aw )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  allocate( eigval_sorted(n), iorder(n) )
+
+  n_good = 0
+  thr    = 1.d-10
+
+  do i = 1, n
+
+    iorder(i) = i
+    eigval_sorted(i) = WR(i)
+
+    if(dabs(WI(i)) .gt. thr) then
+      print*, ' Found an imaginary component to eigenvalue on i = ', i
+      print*, ' Re(i) + Im(i)', WR(i), WI(i)
+    else
+      n_good += 1
+    endif
+
+  enddo
+
+  n_real_eigv = n_good 
+
+  call dsort(eigval_sorted, iorder, n)
+      
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n
+    eigval(i) = WR(i)
+    do j = 1, n
+      reigvec(j,i) = VR(j,iorder(i))
+      leigvec(j,i) = VL(j,iorder(i))
+    enddo
+  enddo
+
+  deallocate( eigval_sorted, iorder )
+  deallocate( WR, WI )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n,n) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    ! L x R is already bi-orthogonal
+
+    !print *, ' L & T bi-orthogonality: ok'
+    deallocate( S )
+    return
+
+  else
+    ! impose bi-orthogonality 
+
+    !print *, ' L & T bi-orthogonality: not imposed yet'
+    !print *, ' accu_nd = ', accu_nd
+    call impose_biorthog_qr(n, n, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec)
+    deallocate( S )
+  
+  endif
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  return
+
+end subroutine non_hrmt_bieig_fullvect
+
+! ---
+
+
+subroutine split_matrix_degen(aw,n,shift)
+ implicit none
+ BEGIN_DOC
+ ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2
+ !
+ ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS
+ END_DOC
+ double precision,intent(inout) :: Aw(n,n)
+ double precision,intent(in)    :: shift
+ integer, intent(in) :: n
+ integer :: i,j,n_degen
+ logical :: keep_on
+ i=1
+ do while(i.lt.n)
+  if(dabs(Aw(i,i)-Aw(i+1,i+1)).lt.shift)then
+   j=1
+   keep_on = .True.
+   do while(keep_on)
+    if(i+j.gt.n)then
+     keep_on = .False.
+     exit
+    endif
+    if(dabs(Aw(i,i)-Aw(i+j,i+j)).lt.shift)then
+     j+=1
+    else
+     keep_on=.False.
+     exit
+    endif
+   enddo
+   n_degen = j
+   j=0
+   keep_on = .True.
+   do while(keep_on)
+    if(i+j+1.gt.n)then
+     keep_on = .False.
+     exit
+    endif
+    if(dabs(Aw(i+j,i+j)-Aw(i+j+1,i+j+1)).lt.shift)then
+     Aw(i+j,i+j) += (j-n_degen/2) * shift
+     j+=1
+    else 
+     keep_on = .False.
+     exit
+    endif
+   enddo
+   Aw(i+n_degen-1,i+n_degen-1) += (n_degen-1-n_degen/2) * shift
+   i+=n_degen
+  else 
+   i+=1
+  endif
+ enddo
+
+end
+
+subroutine give_degen(a,n,shift,list_degen,n_degen_list)
+ implicit none
+ BEGIN_DOC
+ ! returns n_degen_list :: the number of degenerated SET of elements (i.e. with |A(i)-A(i+1)| below shift)
+ !
+ ! for each of these sets, list_degen(1,i) = first degenerate element of the set i, 
+ !
+ !                         list_degen(2,i) = last degenerate element of the set i.
+ END_DOC
+ double precision,intent(in) :: A(n)
+ double precision,intent(in)    :: shift
+ integer, intent(in) :: n
+ integer, intent(out) :: list_degen(2,n),n_degen_list
+ integer :: i,j,n_degen,k
+ logical :: keep_on
+ double precision,allocatable :: Aw(:)
+ list_degen = -1
+ allocate(Aw(n))
+ Aw = A
+ i=1
+ k = 0
+ do while(i.lt.n)
+  if(dabs(Aw(i)-Aw(i+1)).lt.shift)then
+   k+=1
+   j=1
+   list_degen(1,k) = i
+   keep_on = .True.
+   do while(keep_on)
+    if(i+j.gt.n)then
+     keep_on = .False.
+     exit
+    endif
+    if(dabs(Aw(i)-Aw(i+j)).lt.shift)then
+     j+=1
+    else
+     keep_on=.False.
+     exit
+    endif
+   enddo
+   n_degen = j
+   list_degen(2,k) = list_degen(1,k)-1 + n_degen
+   j=0
+   keep_on = .True.
+   do while(keep_on)
+    if(i+j+1.gt.n)then
+     keep_on = .False.
+     exit
+    endif
+    if(dabs(Aw(i+j)-Aw(i+j+1)).lt.shift)then
+     Aw(i+j) += (j-n_degen/2) * shift
+     j+=1
+    else 
+     keep_on = .False.
+     exit
+    endif
+   enddo
+   Aw(i+n_degen-1) += (n_degen-1-n_degen/2) * shift
+   i+=n_degen
+  else 
+   i+=1
+  endif
+ enddo
+ n_degen_list = k
+
+end
+
+subroutine cancel_small_elmts(aw,n,shift)
+ implicit none
+ BEGIN_DOC
+ ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2
+ !
+ ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS
+ END_DOC
+ double precision,intent(inout) :: Aw(n,n)
+ double precision,intent(in)    :: shift
+ integer, intent(in) :: n
+ integer :: i,j
+ do i = 1, n
+  do j = 1, n
+   if(dabs(Aw(j,i)).lt.shift)then
+    Aw(j,i) = 0.d0
+   endif
+  enddo
+ enddo
+end
+
+subroutine check_bi_ortho(reigvec,leigvec,n,S,accu_nd)
+ implicit none
+ integer, intent(in) :: n
+ double precision,intent(in) :: reigvec(n,n),leigvec(n,n)
+ double precision, intent(out) :: S(n,n),accu_nd
+ BEGIN_DOC
+! retunrs the overlap matrix S = Leigvec^T Reigvec 
+!
+! and the square root of the sum of the squared off-diagonal elements of S
+ END_DOC
+ integer :: i,j
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0 &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1)  &
+            , 0.d0, S, size(S, 1) )
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i.ne.j) then
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+end
diff --git a/src/non_hermit_dav/gram_schmit.irp.f b/src/non_hermit_dav/gram_schmit.irp.f
new file mode 100644
index 00000000..520661b8
--- /dev/null
+++ b/src/non_hermit_dav/gram_schmit.irp.f
@@ -0,0 +1,56 @@
+subroutine bi_ortho_gram_schmidt(wi,vi,n,ni,wk,wk_schmidt)
+ implicit none
+ BEGIN_DOC
+! you enter with a set of "ni" BI-ORTHONORMAL vectors of length "n" 
+!
+! vi(j,i) = <j|vi>, wi(j,i) = <j|wi>, <vi|wj> = delta_{ij} S_ii, S_ii =<vi|wi>
+!
+! and a vector vk(j) = <j|vk>
+!
+! you go out with a vector vk_schmidt(j) = <j|vk_schmidt> 
+!
+! which is Gram-Schmidt orthonormalized with respect to the "vi"
+!
+! <vi|wk_schmidt> = 0 
+!
+! |wk_schmidt> = |wk> - \sum_{i=1}^ni (<vi|wk>/<vi|wi>) |wi> 
+!
+! according to Eq. (5), (6) of Computers Structures, Vol 56, No. 4, pp 605-613, 1995
+!
+! https://doi.org/10.1016/0045-7949(94)00565-K
+ END_DOC
+ integer, intent(in) :: n,ni
+ double precision, intent(in) :: wi(n,ni),vi(n,ni),wk(n)
+ double precision, intent(out):: wk_schmidt(n)
+ double precision :: vi_wk,u_dot_v,tmp,u_dot_u
+ double precision, allocatable :: sii(:)
+ integer :: i,j
+ allocate( sii(ni) )
+ wk_schmidt = wk
+ do i = 1, ni
+  sii(i) = u_dot_v(vi(1,i),wi(1,i),n)
+ enddo
+! do i = 1, n
+!  print*,i,'wk',wk(i)
+! enddo
+! print*,''
+! print*,''
+ do i = 1, ni
+!  print*,'i',i
+  ! Gram-Schmidt 
+  vi_wk = u_dot_v(vi(1,i),wk,n)
+  vi_wk = vi_wk / sii(i)
+!  print*,''
+  do j = 1, n
+!   print*,j,vi_wk,wi(j,i)
+   wk_schmidt(j) -= vi_wk * wi(j,i)
+  enddo
+ enddo
+ tmp = u_dot_u(wk_schmidt,n)
+ tmp = 1.d0/dsqrt(tmp)
+ wk_schmidt = tmp * wk_schmidt
+! do j = 1, n
+!  print*,j,'wk_scc',wk_schmidt(j)
+! enddo
+! pause
+end
diff --git a/src/non_hermit_dav/htilde_mat.irp.f b/src/non_hermit_dav/htilde_mat.irp.f
new file mode 100644
index 00000000..6d5101ac
--- /dev/null
+++ b/src/non_hermit_dav/htilde_mat.irp.f
@@ -0,0 +1,93 @@
+BEGIN_PROVIDER [ integer, n_mat]
+ implicit none
+ n_mat = 2
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, h_non_hermit, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, h_non_hermit_transp, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, reigvec_ht, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, leigvec_ht, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, eigval_ht, (n_mat)]
+&BEGIN_PROVIDER [ integer, n_real_ht, (n_mat)]
+ implicit none
+ integer :: i,j
+ do i = 1, n_mat
+  read(33,*)h_non_hermit(i,1:n_mat)
+ enddo
+ print*,''
+ print*,'H_mat '
+ print*,''
+ do i = 1, n_mat
+  write(*,'(1000(F16.10,X))')h_non_hermit(i,:)
+ enddo
+ do i = 1, n_mat
+  do j = 1, n_mat
+   h_non_hermit_transp(j,i) = h_non_hermit(i,j)
+  enddo
+ enddo
+ call non_hrmt_real_diag(n_mat,h_non_hermit,reigvec_ht,leigvec_ht,n_real_ht,eigval_ht)
+
+
+END_PROVIDER 
+
+
+subroutine hcalc_r_tmp(v,u,N_st,sze) ! v = H u
+  implicit none
+  BEGIN_DOC
+  ! Template of routine for the application of H
+  !
+  ! Here, it is done with the Hamiltonian matrix 
+  !
+  ! on the set of determinants of psi_det 
+  !
+  ! Computes $v = H | u \rangle$ 
+  !
+  END_DOC
+  integer, intent(in)              :: N_st,sze
+  double precision, intent(in)     :: u(sze,N_st)
+  double precision, intent(inout)  :: v(sze,N_st)
+  integer :: i,j,istate
+  v = 0.d0
+  do istate = 1, N_st
+   do j = 1, sze
+    do i = 1, sze
+      v(i,istate) += h_non_hermit(i,j) * u(j,istate)
+!      print*,i,j,h_non_hermit(i,j),u(j,istate)
+    enddo
+   enddo
+  enddo
+  print*,'HU'
+  do i = 1, sze
+   print*,v(i,1)
+  enddo
+end
+
+subroutine hcalc_l_tmp(v,u,N_st,sze) ! v = H^\dagger u
+  implicit none
+  BEGIN_DOC
+  ! Template of routine for the application of H
+  !
+  ! Here, it is done with the Hamiltonian matrix 
+  !
+  ! on the set of determinants of psi_det 
+  !
+  ! Computes $v = H | u \rangle$ 
+  !
+  END_DOC
+  integer, intent(in)              :: N_st,sze
+  double precision, intent(in)     :: u(sze,N_st)
+  double precision, intent(inout)  :: v(sze,N_st)
+  integer :: i,j,istate
+  v = 0.d0
+  do istate = 1, N_st
+   do j = 1, sze
+    do i = 1, sze
+      v(i,istate) += h_non_hermit_transp(i,j) * u(j,istate)
+    enddo
+   enddo
+  enddo
+  print*,'HU'
+  do i = 1, sze
+   print*,v(i,1)
+  enddo
+end
diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
new file mode 100644
index 00000000..0d652af4
--- /dev/null
+++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
@@ -0,0 +1,2907 @@
+subroutine lapack_diag_non_sym(n, A, WR, WI, VL, VR)
+
+  BEGIN_DOC
+  ! You enter with a general non hermitian matrix A(n,n) 
+  !
+  ! You get out with the real WR and imaginary part WI of the eigenvalues 
+  !
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  !
+  ! And the left VL and right VR eigenvectors 
+  !
+  ! VL(i,j) = <i|Psi_left(j)>  :: projection on the basis element |i> on the jth left  eigenvector 
+  !
+  ! VR(i,j) = <i|Psi_right(j)> :: projection on the basis element |i> on the jth right eigenvector 
+  !
+  ! The real part of the matrix A can be written as A = VR D VL^T
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n)
+
+  integer                       :: lda, ldvl, ldvr, LWORK, INFO
+  double precision, allocatable :: Atmp(:,:), WORK(:)
+
+  lda  = n
+  ldvl = n
+  ldvr = n
+
+  allocate( Atmp(n,n) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  allocate(WORK(1))
+  LWORK = -1 ! to ask for the optimal size of WORK
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.gt.0)then
+    print*,'dgeev failed !!',INFO
+    stop
+  endif
+  LWORK = max(int(WORK(1)), 1) ! this is the optimal size of WORK 
+  deallocate(WORK)
+
+  allocate(WORK(LWORK))
+
+  ! Actual diagonalization 
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.ne.0) then
+    print*,'dgeev failed !!', INFO
+    stop
+  endif
+
+  deallocate(Atmp, WORK)
+
+end subroutine lapack_diag_non_sym
+
+
+subroutine non_sym_diag_inv_right(n,A,leigvec,reigvec,n_real_eigv,eigval)
+ implicit none
+ BEGIN_DOC
+! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+!
+! of a non hermitian matrix A(n,n)
+!
+! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+ END_DOC
+ integer, intent(in) :: n
+ double precision, intent(in) :: A(n,n)
+ double precision, intent(out) :: reigvec(n,n),leigvec(n,n),eigval(n)
+ double precision, allocatable :: Aw(:,:)
+ integer, intent(out) :: n_real_eigv
+ print*,'Computing the left/right eigenvectors ...'
+ character*1 :: JOBVL,JOBVR
+ JOBVL = "V" ! computes the left  eigenvectors 
+ JOBVR = "V" ! computes the right eigenvectors 
+ double precision, allocatable :: WR(:),WI(:),Vl(:,:),VR(:,:),S(:,:),inv_reigvec(:,:)
+ integer :: i,j
+ integer :: n_good
+ integer, allocatable :: list_good(:), iorder(:)
+ double precision :: thr
+ thr = 1.d-10
+ ! Eigvalue(n) = WR(n) + i * WI(n)
+ allocate(WR(n),WI(n),VL(n,n),VR(n,n),Aw(n,n))
+ Aw = A
+ do i = 1, n
+  do j = i+1, n
+   if(dabs(Aw(j,j)-Aw(i,i)).lt.thr)then
+     Aw(j,j)+= thr
+     Aw(i,i)-= thr
+!    if(Aw(j,i) * A(i,j) .lt.0d0  )then
+!     if(dabs(Aw(j,i) * A(i,j)).lt.thr**(1.5d0))then
+!      print*,Aw(j,j),Aw(i,i)
+!      print*,Aw(j,i) , A(i,j)
+      Aw(j,i) = 0.d0
+      Aw(i,j) = Aw(j,i)
+!     endif
+!    endif
+   endif
+  enddo
+ enddo
+ call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+ ! You track the real eigenvalues 
+ n_good = 0
+! do i = 1, n
+!  write(*,'(100(F16.12,X))')A(:,i)
+! enddo
+ do i = 1, n
+  print*,'Im part of lambda = ',dabs(WI(i))
+  if(dabs(WI(i)).lt.thr)then
+   n_good += 1
+  else
+   print*,'Found an imaginary component to eigenvalue'
+   print*,'Re(i) + Im(i)',WR(i),WI(i)
+   write(*,'(100(F10.5,X))')VR(:,i)
+   write(*,'(100(F10.5,X))')VR(:,i+1)
+   write(*,'(100(F10.5,X))')VL(:,i)
+   write(*,'(100(F10.5,X))')VL(:,i+1)
+  endif
+ enddo
+ allocate(list_good(n_good),iorder(n_good))
+ n_good = 0
+ do i = 1, n
+  if(dabs(WI(i)).lt.thr)then
+   n_good += 1
+   list_good(n_good) = i
+   eigval(n_good) = WR(i)
+  endif
+ enddo
+ n_real_eigv = n_good 
+ do i = 1, n_good
+  iorder(i) = i
+ enddo
+ ! You sort the real eigenvalues 
+ call dsort(eigval,iorder,n_good)
+ do i = 1, n_real_eigv
+  do j = 1, n
+   reigvec(j,i) = VR(j,list_good(iorder(i)))
+   leigvec(j,i) = VL(j,list_good(iorder(i)))
+  enddo
+ enddo
+ allocate(inv_reigvec(n_real_eigv,n_real_eigv))
+! call get_pseudo_inverse(reigvec,n_real_eigv,n_real_eigv,n_real_eigv,inv_reigvec,n_real_eigv,thr)
+! do i = 1, n_real_eigv
+!  do j = 1, n
+!   leigvec(j,i) = inv_reigvec(i,j)
+!  enddo
+! enddo
+ allocate( S(n_real_eigv,n_real_eigv) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+   do i = 1,n_real_eigv
+    write(*,'(100(F10.5,X))')S(:,i)
+   enddo
+! call lapack_diag_non_sym(n,S,WR,WI,VL,VR)
+! print*,'Eigenvalues of S'
+! do i = 1, n
+!  print*,WR(i),dabs(WI(i))
+! enddo
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+! call get_inv_half_svd(S, n_real_eigv, inv_reigvec)
+
+  double precision :: accu_d,accu_nd
+  accu_nd = 0.d0
+  accu_d = 0.d0
+  do i = 1, n_real_eigv
+    do j = 1, n_real_eigv
+      if(i==j) then
+       accu_d += S(j,i) * S(j,i)
+      else
+       accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  print*,'accu_nd = ',accu_nd
+  if( accu_nd .lt. 1d-10 ) then
+    ! L x R is already bi-orthogonal
+    !print *, ' L & T bi-orthogonality: ok'
+    return
+  else
+   print*,'PB with bi-orthonormality!!'
+   stop
+  endif
+end
+
+subroutine lapack_diag_non_sym_new(n, A, WR, WI, VL, VR)
+
+  BEGIN_DOC
+  !
+  ! You enter with a general non hermitian matrix A(n,n) 
+  !
+  ! You get out with the real WR and imaginary part WI of the eigenvalues 
+  !
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  !
+  ! And the left VL and right VR eigenvectors 
+  !
+  ! VL(i,j) = <i|Psi_left(j)>  :: projection on the basis element |i> on the jth left  eigenvector 
+  !
+  ! VR(i,j) = <i|Psi_right(j)> :: projection on the basis element |i> on the jth right eigenvector 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n)
+
+  character*1                   :: JOBVL,JOBVR,BALANC,SENSE
+  integer                       :: ILO, IHI
+  integer                       :: lda, ldvl, ldvr, LWORK, INFO
+  double precision              :: ABNRM
+  integer,          allocatable :: IWORK(:)
+  double precision, allocatable :: WORK(:), SCALE_array(:), RCONDE(:), RCONDV(:)
+  double precision, allocatable :: Atmp(:,:)
+
+  allocate( Atmp(n,n) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  JOBVL  = "V" ! computes the left  eigenvectors 
+  JOBVR  = "V" ! computes the right eigenvectors 
+  BALANC = "B" ! Diagonal scaling and Permutation for optimization
+  SENSE  = "B"
+  lda  = n
+  ldvl = n
+  ldvr = n
+  allocate(WORK(1),SCALE_array(n),RCONDE(n),RCONDV(n),IWORK(2*n-2))
+  LWORK = -1 ! to ask for the optimal size of WORK
+  call dgeevx(BALANC,JOBVL,JOBVR,SENSE,&  ! CHARACTERS 
+              n,Atmp,lda,              &  ! MATRIX TO DIAGONALIZE
+              WR,WI,                   &  ! REAL AND IMAGINARY PART OF EIGENVALUES 
+              VL,ldvl,VR,ldvr,         &  ! LEFT AND RIGHT EIGENVECTORS 
+              ILO,IHI,SCALE_array,ABNRM,RCONDE,RCONDV, & ! OUTPUTS OF OPTIMIZATION
+              WORK,LWORK,IWORK,INFO)
+
+  !if(INFO.gt.0)then
+  ! print*,'dgeev failed !!',INFO
+  if( INFO.ne.0 ) then
+    print *, 'dgeevx failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(int(work(1)), 1) ! this is the optimal size of WORK 
+  deallocate(WORK)
+  allocate(WORK(LWORK))
+  ! Actual dnon_hrmt_real_diag_newiagonalization 
+  call dgeevx(BALANC,JOBVL,JOBVR,SENSE,&  ! CHARACTERS 
+              n,Atmp,lda,              &  ! MATRIX TO DIAGONALIZE
+              WR,WI,                   &  ! REAL AND IMAGINARY PART OF EIGENVALUES 
+              VL,ldvl,VR,ldvr,         &  ! LEFT AND RIGHT EIGENVECTORS 
+              ILO,IHI,SCALE_array,ABNRM,RCONDE,RCONDV, & ! OUTPUTS OF OPTIMIZATION
+              WORK,LWORK,IWORK,INFO)
+
+  !if(INFO.ne.0)then
+  ! print*,'dgeev failed !!',INFO
+  if( INFO.ne.0 ) then
+    print *, 'dgeevx failed !!', INFO
+    stop
+  endif
+
+  deallocate( Atmp )
+  deallocate( WORK, SCALE_array, RCONDE, RCONDV, IWORK )
+
+end subroutine lapack_diag_non_sym_new
+
+! ---
+
+subroutine lapack_diag_non_sym_right(n, A, WR, WI, VR)
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VR(n,n)
+
+  integer                       :: i, lda, ldvl, ldvr, LWORK, INFO
+  double precision, allocatable :: Atmp(:,:), WORK(:), VL(:,:)
+
+  lda  = n
+  ldvl = 1
+  ldvr = n
+
+  allocate( Atmp(n,n), VL(1,1) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  allocate(WORK(1))
+  LWORK = -1
+  call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.gt.0)then
+    print*,'dgeev failed !!',INFO
+    stop
+  endif
+
+  LWORK = max(int(WORK(1)), 1) ! this is the optimal size of WORK 
+  deallocate(WORK)
+
+  allocate(WORK(LWORK))
+
+  ! Actual diagonalization 
+  call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.ne.0) then
+    print*,'dgeev failed !!', INFO
+    stop
+  endif
+
+  deallocate(Atmp, WORK, VL)
+
+! print *, ' JOBL = F'
+! print *, ' eigenvalues'
+! do i = 1, n
+!   write(*, '(1000(F16.10,X))') WR(i), WI(i)
+! enddo
+! print *, ' right eigenvect' 
+! do i = 1, n
+!   write(*, '(1000(F16.10,X))') VR(:,i)
+! enddo
+
+end subroutine lapack_diag_non_sym_right
+
+! ---
+
+subroutine non_hrmt_real_diag(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j, n_good
+  double precision              :: thr, threshold, accu_d, accu_nd
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:), S(:,:), S_inv_half_tmp(:,:)
+
+  print*, ' Computing the left/right eigenvectors with lapack ...'
+
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  Aw = A
+  !print *, ' matrix to diagonalize', Aw
+  call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR)
+
+  ! ---
+  ! You track the real eigenvalues 
+
+  thr = 1d-15
+
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.thr) then
+      n_good += 1
+    else
+      print*, ' Found an imaginary component to eigenvalue'
+      print*, ' Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+
+  allocate(list_good(n_good), iorder(n_good))
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.thr) then
+      n_good += 1
+      list_good(n_good) = i
+      eigval(n_good) = WR(i)
+    endif
+  enddo
+  n_real_eigv = n_good
+  do i = 1, n_good
+   iorder(i) = i
+  enddo
+
+  ! You sort the real eigenvalues 
+  call dsort(eigval, iorder, n_good)
+  do i = 1, n_real_eigv
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = Vl(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+! print *, ' ordered eigenvalues'
+! print *, ' right eigenvect' 
+! do i = 1, n
+!   print *, i, eigval(i)
+!   write(*, '(1000(F16.10,X))') reigvec(:,i)
+! enddo
+
+  ! ---
+
+  allocate( S(n_real_eigv,n_real_eigv), S_inv_half_tmp(n_real_eigv,n_real_eigv) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0 &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1)  &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n_real_eigv
+    do j = 1, n_real_eigv
+      if(i==j) then
+        accu_d += S(j,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  threshold = 1.d-15
+  if( (accu_nd .gt. threshold) .or. (dabs(accu_d-dble(n_real_eigv)) .gt. threshold) ) then
+
+    print*, ' sum of off-diag S elements = ', accu_nd
+    print*, ' Should be zero '
+    print*, ' sum of     diag S elements = ', accu_d
+    print*, ' Should be ',n
+    print*, ' Not bi-orthonormal !!'
+    print*, ' Notice that if you are interested in ground state it is not a problem :)'
+  endif
+
+end subroutine non_hrmt_real_diag
+
+! ---
+
+subroutine lapack_diag_general_non_sym(n, A, B, WR, WI, VL, VR)
+
+  BEGIN_DOC
+  ! You enter with a general non hermitian matrix A(n,n) and another B(n,n)
+  !
+  ! You get out with the real WR and imaginary part WI of the eigenvalues 
+  !
+  ! Eigvalue(n) = (WR(n) + i * WI(n))
+  !
+  ! And the left VL and right VR eigenvectors 
+  !
+  ! VL(i,j) = <i|Psi_left(j)>  :: projection on the basis element |i> on the jth left  eigenvector 
+  !
+  ! VR(i,j) = <i|Psi_right(j)> :: projection on the basis element |i> on the jth right eigenvector 
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n), B(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n)
+
+  integer                       :: lda, ldvl, ldvr, LWORK, INFO
+  integer                       :: n_good
+  double precision, allocatable :: WORK(:)
+  double precision, allocatable :: Atmp(:,:)
+
+  lda  = n
+  ldvl = n
+  ldvr = n
+
+  allocate( Atmp(n,n) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  allocate(WORK(1))
+  LWORK = -1 
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.gt.0) then
+    print*,'dgeev failed !!',INFO
+    stop
+  endif
+
+  LWORK = max(int(WORK(1)), 1) 
+  deallocate(WORK)
+
+  allocate(WORK(LWORK))
+
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.ne.0) then
+    print*,'dgeev failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, Atmp )
+
+end subroutine lapack_diag_general_non_sym
+
+! ---
+
+subroutine non_hrmt_general_real_diag(n, A, B, reigvec, leigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n) and B(n,n) 
+  !
+  ! A reigvec = eigval * B * reigvec
+  !
+  ! (A)^\dagger leigvec = eigval * B * leigvec
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n), B(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  integer, allocatable          :: list_good(:), iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:)
+  double precision, allocatable :: Aw(:,:), Bw(:,:)
+
+  print*,'Computing the left/right eigenvectors ...'
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), Bw(n,n))
+  Aw = A
+  Bw = B
+
+  call lapack_diag_general_non_sym(n, A, B, WR, WI, VL, VR)
+
+  ! You track the real eigenvalues 
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)) .lt. 1.d-12) then
+      n_good += 1
+    else
+      print*,'Found an imaginary component to eigenvalue'
+      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+
+  allocate(list_good(n_good), iorder(n_good))
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-12)then
+      n_good += 1
+      list_good(n_good) = i
+      eigval(n_good) = WR(i)
+    endif
+  enddo
+  n_real_eigv = n_good 
+  do i = 1, n_good
+   iorder(i) = i
+  enddo
+
+  ! You sort the real eigenvalues 
+  call dsort(eigval, iorder, n_good)
+  print*,'n_real_eigv = ', n_real_eigv
+  print*,'n           = ', n
+  do i = 1, n_real_eigv
+    print*,i,'eigval(i) = ', eigval(i) 
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = Vl(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+end subroutine non_hrmt_general_real_diag
+
+! ---
+
+subroutine impose_biorthog_qr(m, n, thr_d, thr_nd, Vl, Vr)
+
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(inout) :: Vl(m,n), Vr(m,n)
+
+  integer                         :: i, j
+  integer                         :: LWORK, INFO
+  double precision                :: accu_nd, accu_d
+  double precision, allocatable   :: TAU(:), WORK(:)
+  double precision, allocatable   :: S(:,:), R(:,:), tmp(:,:)
+
+  ! ---
+
+  call check_biorthog_binormalize(m, n, Vl, Vr, thr_d, thr_nd, .false.)
+  
+  ! ---
+  
+  allocate(S(n,n))
+  call dgemm( 'T', 'N', n, n, m, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) then
+        accu_d += S(j,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n))/dble(n) .lt. thr_d)) then
+    print *, ' bi-orthogonal vectors without QR !'
+    deallocate(S)
+    return
+  endif
+
+  ! -------------------------------------------------------------------------------------
+  !                           QR factorization of S: S = Q x R
+
+
+  print *, ' apply QR decomposition ...'
+
+  allocate( TAU(n), WORK(1) )
+
+  LWORK = -1
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  ! save the upper triangular R
+  allocate( R(n,n) )
+  R(:,:) = S(:,:)
+
+  ! get Q
+  LWORK = -1
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, TAU )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               get bi-orhtog left & right vectors:
+  !                                           Vr' = Vr x inv(R) 
+  !                                           Vl' = inv(Q) x Vl =  Q.T   x Vl 
+
+  ! Q.T x Vl, where Q = S
+
+  allocate( tmp(n,m) )
+  call dgemm( 'T', 'T', n, m, n, 1.d0        &
+            , S, size(S, 1), Vl, size(Vl, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  do i = 1, n
+    do j = 1, m
+      Vl(j,i) = tmp(i,j)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  ! ---
+
+  ! inv(R) 
+  !print *, ' inversing upper triangular matrix ...'
+  call dtrtri("U", "N", n, R, n, INFO)
+  if(INFO .ne. 0) then
+    print*,'dtrtri failed !!', INFO
+    stop
+  endif
+  !print *, ' inversing upper triangular matrix OK' 
+
+  do i = 1, n-1
+    do j = i+1, n
+      R(j,i) = 0.d0
+    enddo
+  enddo
+
+  !print *, ' inv(R):'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') R(i,:)
+  !enddo
+
+  ! Vr x inv(R) 
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'N', m, n, n, 1.d0        &
+            , Vr, size(Vr, 1), R, size(R, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate( R )
+
+  do i = 1, n
+    do j = 1, m
+      Vr(j,i) = tmp(j,i)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  return
+end subroutine impose_biorthog_qr
+
+! ---
+
+subroutine impose_biorthog_lu(m, n, Vl, Vr, S)
+
+  implicit none 
+  integer, intent(in)             :: m, n
+  double precision, intent(inout) :: Vl(m,n), Vr(m,n), S(n,n)
+
+  integer                         :: i, j
+  integer                         :: INFO
+  double precision                :: nrm
+  integer,          allocatable   :: IPIV(:)
+  double precision, allocatable   :: L(:,:), tmp(:,:), vectmp(:)
+  !double precision, allocatable   :: T(:,:), ll(:,:), rr(:,:), tt(:,:)
+
+  !allocate( T(n,n) )
+  !T(:,:) = S(:,:)
+
+  print *, ' apply LU decomposition ...'
+
+  ! -------------------------------------------------------------------------------------
+  !                           LU factorization of S: S = P x L x U
+
+  allocate( IPIV(n) )
+
+  call dgetrf(n, n, S, n, IPIV, INFO)
+  if(INFO .ne. 0) then
+    print*, 'dgetrf failed !!', INFO
+    stop
+  endif
+
+  ! check | S - P x L x U |
+  !allocate( ll(n,n), rr(n,n), tmp(n,n) )
+  !ll = S
+  !rr = S
+  !do i = 1, n-1
+  !  ll(i,i) = 1.d0
+  !  do j = i+1, n
+  !    ll(i,j) = 0.d0
+  !    rr(j,i) = 0.d0
+  !  enddo
+  !enddo
+  !ll(n,n) = 1.d0
+  !call dgemm( 'N', 'N', n, n, n, 1.d0          &
+  !          , ll, size(ll, 1), rr, size(rr, 1) &
+  !          , 0.d0, tmp, size(tmp, 1) )
+  ! deallocate(ll, rr)
+  !allocate( vectmp(n) )
+  !do j = n-1, 1, -1
+  !  i = IPIV(j)
+  !  if(i.ne.j) then
+  !    print *, j, i
+  !    vectmp(:) = tmp(i,:)
+  !    tmp(i,:)  = tmp(j,:)
+  !    tmp(j,:)  = vectmp(:)
+  !  endif
+  !enddo
+  !deallocate( vectmp )
+  !nrm = 0.d0
+  !do i = 1, n
+  !  do j = 1, n
+  !    nrm += dabs(tmp(j,i) - T(j,i))
+  !  enddo
+  !enddo
+  !deallocate( tmp )
+  !print*, '|L.T x R - S| =', nrm
+  !stop
+
+  ! ------
+  ! inv(L) 
+  ! ------
+
+  allocate( L(n,n) )
+  L(:,:) = S(:,:)
+
+  call dtrtri("L", "U", n, L, n, INFO)
+  if(INFO .ne. 0) then
+    print*,  'dtrtri failed !!', INFO
+    stop
+  endif
+  do i = 1, n-1
+    L(i,i) = 1.d0
+    do j = i+1, n
+      L(i,j) = 0.d0
+    enddo
+  enddo
+  L(n,n) = 1.d0
+
+  ! ------
+  ! inv(U) 
+  ! ------
+  
+  call dtrtri("U", "N", n, S, n, INFO)
+  if(INFO .ne. 0) then
+    print*,  'dtrtri failed !!', INFO
+    stop
+  endif
+
+  do i = 1, n-1
+    do j = i+1, n
+      S(j,i) = 0.d0
+    enddo
+  enddo
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               get bi-orhtog left & right vectors:
+  !                                           Vr' = Vr x inv(U) 
+  !                                           Vl' = inv(L) x inv(P) x Vl
+
+  ! inv(P) x Vl
+  allocate( vectmp(n) )
+  do j = n-1, 1, -1
+    i = IPIV(j)
+    if(i.ne.j) then
+      vectmp(:) = L(:,j)
+      L(:,j)    = L(:,i)
+      L(:,i)    = vectmp(:)
+    endif
+  enddo
+  deallocate( vectmp )
+
+  ! Vl'
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'T', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), L, size(L, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(L)
+
+  Vl = tmp
+  deallocate(tmp)
+
+  ! ---
+
+  ! Vr x inv(U) 
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'N', m, n, n, 1.d0        &
+            , Vr, size(Vr, 1), S, size(S, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  Vr = tmp
+  deallocate(tmp)
+
+  !allocate( tmp(n,n) )
+  !call dgemm( 'T', 'N', n, n, m, 1.d0          &
+  !          , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+  !          , 0.d0, tmp, size(tmp, 1) )
+  !nrm = 0.d0
+  !do i = 1, n
+  !  do j = 1, n
+  !    nrm += dabs(tmp(j,i))
+  !  enddo
+  !enddo
+  !deallocate( tmp )
+  !print*, '|L.T x R| =', nrm
+  !stop
+
+  return
+end subroutine impose_biorthog_lu
+
+! ---
+
+subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, stop_ifnot)
+
+  implicit none
+  integer,          intent(in)  :: n, m
+  logical,          intent(in)  :: stop_ifnot
+  double precision, intent(in)  :: A(n,n), eigval(m), leigvec(n,m), reigvec(n,m), thr_diag, thr_norm
+ 
+  integer                       :: i, j
+  double precision              :: tmp, tmp_abs, tmp_nrm, tmp_rel, tmp_dif
+  double precision              :: V_nrm, U_nrm
+  double precision, allocatable :: Mtmp(:,:)
+
+  allocate( Mtmp(n,m) )
+  
+  ! ---
+
+  Mtmp = 0.d0
+  call dgemm( 'N', 'N', n, m, n, 1.d0                  &
+            , A, size(A, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, Mtmp, size(Mtmp, 1) )
+
+  V_nrm   = 0.d0
+  tmp_nrm = 0.d0
+  tmp_abs = 0.d0
+  do j = 1, m
+    
+    tmp   = 0.d0
+    U_nrm = 0.d0
+    do i = 1, n
+      tmp     = tmp     + dabs(Mtmp(i,j) - eigval(j) * reigvec(i,j))
+      tmp_nrm = tmp_nrm + dabs(Mtmp(i,j))
+      U_nrm   = U_nrm   + reigvec(i,j) * reigvec(i,j)
+    enddo
+
+    tmp_abs = tmp_abs + tmp
+    V_nrm   = V_nrm   + U_nrm 
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
+
+  enddo
+
+  if(tmp_abs.lt.10.d-10)then
+   tmp_rel = thr_diag/10.d0
+  else
+   tmp_rel = tmp_abs / tmp_nrm
+  endif
+  tmp_dif = dabs(V_nrm - dble(m))
+
+  if( stop_ifnot .and. ((tmp_rel .gt. thr_diag) .or. (tmp_dif .gt. thr_norm)) ) then
+    print *, ' error in right-eigenvectors'
+    print *, ' err tol   = ',thr_diag, thr_norm
+    print *, '(tmp_rel .gt. thr_diag) = ',(tmp_rel .gt. thr_diag)
+    print *, '(tmp_dif .gt. thr_norm) = ',(tmp_dif .gt. thr_norm)
+    print *, ' err estim = ', tmp_abs, tmp_rel
+    print *, ' CR norm   = ', V_nrm 
+    stop
+  endif
+
+  ! ---
+
+  Mtmp = 0.d0
+  call dgemm( 'T', 'N', n, m, n, 1.d0                  &
+            , A, size(A, 1), leigvec, size(leigvec, 1) &
+            , 0.d0, Mtmp, size(Mtmp, 1) )
+
+  V_nrm   = 0.d0
+  tmp_nrm = 0.d0
+  tmp_abs = 0.d0
+  do j = 1, m
+
+    tmp   = 0.d0
+    U_nrm = 0.d0
+    do i = 1, n
+      tmp     = tmp     + dabs(Mtmp(i,j) - eigval(j) * leigvec(i,j))
+      tmp_nrm = tmp_nrm + dabs(Mtmp(i,j))
+      U_nrm   = U_nrm   + leigvec(i,j) * leigvec(i,j)
+    enddo
+
+    tmp_abs = tmp_abs + tmp
+    V_nrm   = V_nrm   + U_nrm 
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
+
+  enddo
+
+  if(tmp_abs.lt.10.d-10)then
+   tmp_rel = thr_diag/10.d0
+  else
+   tmp_rel = tmp_abs / tmp_nrm
+  endif
+  if( stop_ifnot .and. ((tmp_rel .gt. thr_diag) .or. (tmp_dif .gt. thr_norm)) ) then
+    print *, ' error in left-eigenvectors'
+    print *, ' err tol   = ',thr_diag, thr_norm
+    print *, '(tmp_rel .gt. thr_diag) = ',(tmp_rel .gt. thr_diag)
+    print *, '(tmp_dif .gt. thr_norm) = ',(tmp_dif .gt. thr_norm)
+    print *, ' err estim = ', tmp_abs, tmp_rel
+    print *, ' CR norm   = ', V_nrm 
+    stop
+  endif
+
+  ! ---
+
+  deallocate( Mtmp )
+
+end subroutine check_EIGVEC
+
+! ---
+
+subroutine check_degen(n, m, eigval, leigvec, reigvec)
+
+  implicit none
+  integer,          intent(in)    :: n, m
+  double precision, intent(in)    :: eigval(m)
+  double precision, intent(inout) :: leigvec(n,m), reigvec(n,m)
+ 
+  integer                         :: i, j
+  double precision                :: ei, ej, de, de_thr, accu_nd
+  double precision, allocatable   :: S(:,:)
+
+  de_thr = 1d-6
+
+  do i = 1, m-1
+    ei = eigval(i)
+
+    do j = i+1, m
+      ej = eigval(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+
+        leigvec(:,i) = 0.d0
+        leigvec(:,j) = 0.d0
+        leigvec(i,i) = 1.d0
+        leigvec(j,j) = 1.d0
+
+        reigvec(:,i) = 0.d0
+        reigvec(:,j) = 0.d0
+        reigvec(i,i) = 1.d0
+        reigvec(j,j) = 1.d0
+
+      endif
+
+    enddo
+  enddo
+
+  ! ---
+
+  allocate( S(m,m) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', m, m, n, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  deallocate( S )
+
+  print *, ' check_degen: L & T bi-orthogonality: ok'
+  print *, ' accu_nd = ', accu_nd
+
+  if( accu_nd .lt. 1d-8 ) then
+    return
+  else
+    stop
+  endif
+
+end subroutine check_degen
+
+! ---
+
+subroutine impose_weighted_orthog_svd(n, m, W, C)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: C(n,m), W(n,n)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+
+  !print *, ' apply SVD to orthogonalize & normalize weighted vectors'
+
+  ! ---
+
+  ! C.T x W x C
+  allocate(S(m,m))
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0      &
+            , C, size(C, 1), W, size(W, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0          &
+            , tmp, size(tmp, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(tmp)
+
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- C x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , C, size(C, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(U, Vt)
+
+  ! C <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      C(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  ! C.T x W x C
+  allocate(S(m,m))
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0      &
+            , C, size(C, 1), W, size(W, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0          &
+            , tmp, size(tmp, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(tmp)
+
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_weighted_orthog_svd
+
+! ---
+
+subroutine impose_orthog_svd(n, m, C)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: C(n,m)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+
+  !print *, ' apply SVD to orthogonalize & normalize vectors'
+
+  ! ---
+
+  allocate(S(m,m))
+
+  ! S = C.T x C
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      write(*,*) ' D(i) = ', D(i)
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    write(*,*) ' try with Graham-Schmidt'
+    stop
+  endif
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- C x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , C, size(C, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(U, Vt)
+
+  ! C <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      C(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  allocate(S(m,m))
+
+  ! S = C.T x C
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_orthog_svd
+
+! ---
+
+subroutine impose_orthog_svd_overlap(n, m, C, overlap)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(in   ) :: overlap(n,n)
+  double precision, intent(inout) :: C(n,m)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:), Stmp(:,:)
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+
+  print *, ' apply SVD to orthogonalize vectors'
+
+  ! ---
+
+  ! S = C.T x overlap x C
+  allocate(S(m,m), Stmp(n,m))
+  call dgemm( 'N', 'N', n, m, n, 1.d0                  &
+            , overlap, size(overlap, 1), C, size(C, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'T', 'N', m, m, n, 1.d0            &
+            , C, size(C, 1), Stmp, size(Stmp, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- C x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , C, size(C, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(U, Vt)
+
+  ! C <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      C(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  ! S = C.T x overlap x C
+  allocate(S(m,m), Stmp(n,m))
+  call dgemm( 'N', 'N', n, m, n, 1.d0                  &
+            , overlap, size(overlap, 1), C, size(C, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'T', 'N', m, m, n, 1.d0            &
+            , C, size(C, 1), Stmp, size(Stmp, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+  deallocate(S)
+
+end subroutine impose_orthog_svd_overlap
+
+! ---
+
+subroutine impose_orthog_GramSchmidt(n, m, C)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: C(n,m)
+
+  integer                         :: i, j, k
+  double precision                :: Ojk, Ojj, fact_ct
+  double precision, allocatable   :: S(:,:)
+
+  print *, ''
+  print *, ' apply Gram-Schmidt to orthogonalize & normalize vectors'
+  print *, ''
+
+  ! ---
+
+  allocate(S(m,m))
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ' eigenvec overlap bef Gram-Schmidt: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  ! ---
+
+  do k = 2, m
+    do j = 1, k-1
+
+      Ojk = 0.d0    
+      Ojj = 0.d0    
+      do i = 1, n
+        Ojk = Ojk + C(i,j) * C(i,k)
+        Ojj = Ojj + C(i,j) * C(i,j)
+      enddo
+      fact_ct = Ojk / Ojj
+
+      do i = 1, n
+        C(i,k) = C(i,k) - fact_ct * C(i,j)
+      enddo
+
+    enddo
+  enddo
+
+  do k = 1, m
+    fact_ct = 0.d0    
+    do i = 1, n
+      fact_ct = fact_ct + C(i,k) * C(i,k)
+    enddo
+    fact_ct = dsqrt(fact_ct)
+    do i = 1, n
+      C(i,k) = C(i,k) / fact_ct
+    enddo
+  enddo
+
+  ! ---
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ' eigenvec overlap aft Gram-Schmidt: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_orthog_GramSchmidt
+
+! ---
+
+subroutine impose_orthog_ones(n, deg_num, C)
+
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  integer,          intent(in)    :: deg_num(n)
+  double precision, intent(inout) :: C(n,n)
+
+  integer                         :: i, j, ii, di, dj
+
+  print *, ''
+  print *, ' orthogonalize vectors by hand'
+  print *, ''
+
+  do i = 1, n-1
+    di = deg_num(i)
+
+    if(di .gt. 1) then
+
+      do ii = 1, di
+        C(:     ,i+ii-1) = 0.d0
+        C(i+ii-1,i+ii-1) = 1.d0
+      enddo
+
+      do j = i+di+1, n
+        dj = deg_num(j)
+        if(dj .eq. di) then
+          do ii = 1, dj
+            C(:,     j+ii-1) = 0.d0
+            C(j+ii-1,j+ii-1) = 1.d0
+          enddo
+        endif
+      enddo
+
+    endif
+  enddo 
+
+end subroutine impose_orthog_ones
+
+! ---
+
+subroutine impose_orthog_degen_eigvec(n, e0, C0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: e0(n)
+  double precision, intent(inout) :: C0(n,n)
+
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: C(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = thr_degen_tc
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+
+  
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
+
+  ! ---
+
+!  call impose_orthog_ones(n, deg_num, C0)
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+    !if(m.eq.3) then
+  
+      allocate(C(n,m))
+      do j = 1, m
+        C(1:n,j) = C0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      ! C <= C U sigma^-0.5
+      call impose_orthog_svd(n, m, C)
+
+      ! ---
+
+      ! C = I
+      !C = 0.d0
+      !do j = 1, m
+      !  C(i+j-1,j) = 1.d0
+      !enddo
+
+      ! ---
+
+!      call impose_orthog_GramSchmidt(n, m, C)
+
+      ! ---
+
+      do j = 1, m
+        C0(1:n,i+j-1) = C(1:n,j)
+      enddo
+      deallocate(C)
+
+    endif
+  enddo
+
+end subroutine impose_orthog_degen_eigvec 
+
+! ---
+
+subroutine get_halfinv_svd(n, S)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(inout) :: S(n,n)
+
+  integer                         :: num_linear_dependencies
+  integer                         :: i, j, k
+  double precision                :: accu_d, accu_nd, thresh
+  double precision, parameter     :: threshold = 1.d-6
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+  double precision, allocatable   :: S0(:,:), Stmp(:,:), Stmp2(:,:)
+
+  allocate( S0(n,n) )
+  S0(1:n,1:n) = S(1:n,1:n)
+
+  allocate(U(n,n), Vt(n,n), D(n))
+  call svd(S, n, U, n, D, Vt, n, n, n)
+
+  num_linear_dependencies = 0
+  do i = 1, n
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  write(*,*) ' linear dependencies', num_linear_dependencies
+
+  S(:,:) = 0.d0
+  do k = 1, n
+    if(D(k) /= 0.d0) then
+      do j = 1, n
+        do i = 1, n
+          S(i,j) = S(i,j) + U(i,k) * D(k) * Vt(k,j)
+        enddo
+      enddo
+    endif
+  enddo
+  deallocate(U, D, Vt)
+
+  allocate( Stmp(n,n), Stmp2(n,n) )
+  Stmp  = 0.d0
+  Stmp2 = 0.d0
+  ! S^-1/2 x S
+  call dgemm( 'N', 'N', n, n, n, 1.d0        &
+            , S, size(S, 1), S0, size(S0, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  ! ( S^-1/2 x S ) x S^-1/2
+  call dgemm( 'N', 'N', n, n, n, 1.d0            &
+            , Stmp, size(Stmp, 1), S, size(S, 1) &
+            , 0.d0, Stmp2, size(Stmp2, 1) )
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  thresh  = 1.d-10
+  do i = 1, n
+    do j = 1, n
+      if(i==j) then
+       accu_d += Stmp2(j,i)
+      else 
+       accu_nd = accu_nd + Stmp2(j,i) * Stmp2(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+  if( accu_nd.gt.thresh .or. dabs(accu_d-dble(n)).gt.thresh) then
+    print*, ' after S^-1/2: sum of off-diag S elements = ', accu_nd
+    print*, ' after S^-1/2: sum of     diag S elements = ', accu_d
+    do i = 1, n
+      write(*,'(1000(F16.10,X))') Stmp2(i,:)
+    enddo
+    stop
+  endif
+
+  deallocate(S0, Stmp, Stmp2)
+
+end subroutine get_halfinv_svd
+
+! ---
+
+subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)    :: n, m
+  logical,          intent(in)    :: stop_ifnot
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(inout) :: Vl(n,m), Vr(n,m)
+
+  integer                         :: i, j
+  double precision                :: accu_d, accu_nd, s_tmp
+  double precision, allocatable   :: S(:,:)
+
+  !print *, ' check bi-orthonormality'
+
+  ! ---
+
+  allocate(S(m,m))
+  call dgemm( 'T', 'N', m, m, n, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  !print *, ' overlap matrix before:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! S(i,i) = -1
+  do i = 1, m
+    if(S(i,i) .lt. 0.d0) then
+    !if( (S(i,i) + 1.d0) .lt. thr_d ) then
+      do j = 1, n
+        Vl(j,i) = -1.d0 * Vl(j,i)
+      enddo
+      !S(i,i) = 1.d0
+      S(i,i) = -S(i,i)
+    endif
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+  !print*, '    diag acc bef = ', accu_d
+  !print*, ' nondiag acc bef = ', accu_nd
+
+  ! ---
+
+  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+
+    do i = 1, m
+      if(S(i,i) <= 0.d0) then
+        print *, ' overap negative'
+        print *, i, S(i,i)
+        exit
+      endif
+      if(dabs(S(i,i) - 1.d0) .gt. thr_d) then
+        s_tmp = 1.d0 / dsqrt(S(i,i))
+        do j = 1, n
+          Vl(j,i) = Vl(j,i) * s_tmp 
+          Vr(j,i) = Vr(j,i) * s_tmp 
+        enddo
+      endif
+
+    enddo
+
+  endif
+
+  ! ---
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  !print *, ' overlap matrix after:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+  !print *, '    diag acc aft = ', accu_d
+  !print *, ' nondiag acc aft = ', accu_nd
+
+  deallocate(S)
+
+  ! ---
+
+  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) ) then
+    print *, accu_nd, thr_nd 
+    print *, dabs(accu_d-dble(m))/dble(m), thr_d
+    print *, ' biorthog_binormalize failed !'
+    stop
+  endif
+
+end subroutine check_biorthog_binormalize
+
+! ---
+
+subroutine check_weighted_biorthog(n, m, W, Vl, Vr, thr_d, thr_nd, accu_d, accu_nd, S, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: Vl(n,m), Vr(n,m), W(n,n)
+  double precision, intent(in)  :: thr_d, thr_nd
+  logical,          intent(in)  :: stop_ifnot
+  double precision, intent(out) :: accu_d, accu_nd, S(m,m)
+
+  integer                       :: i, j
+  double precision, allocatable :: SS(:,:), tmp(:,:)
+
+  print *, ' check weighted bi-orthogonality'
+
+  ! ---
+
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0            &
+            , tmp, size(tmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(tmp)
+
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + dabs(S(i,i))
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  print *, ' accu_nd = ', accu_nd
+  print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+
+  ! ---
+
+  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+    print *, ' non bi-orthogonal vectors !'
+    print *, ' accu_nd = ', accu_nd
+    print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+    !print *, ' overlap matrix:'
+    !do i = 1, m
+    !  write(*,'(1000(F16.10,X))') S(i,:)
+    !enddo
+    stop
+  endif
+
+end subroutine check_weighted_biorthog
+
+! ---
+
+subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: Vl(n,m), Vr(n,m)
+  logical,          intent(in)  :: stop_ifnot
+  double precision, intent(in)  :: thr_d, thr_nd
+  double precision, intent(out) :: accu_d, accu_nd, S(m,m)
+
+  integer                       :: i, j
+  double precision, allocatable :: SS(:,:)
+
+  !print *, ' check bi-orthogonality'
+
+  ! ---
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + dabs(S(i,i))
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+
+  !print *, ' accu_nd = ', accu_nd
+  !print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+
+  ! ---
+
+  if(stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) then
+    print *, ' non bi-orthogonal vectors !'
+    print *, ' accu_nd = ', accu_nd
+    print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+    !print *, ' overlap matrix:'
+    !do i = 1, m
+    !  write(*,'(1000(F16.10,X))') S(i,:)
+    !enddo
+    stop
+  endif
+
+end subroutine check_biorthog
+
+! ---
+
+subroutine check_orthog(n, m, V, accu_d, accu_nd, S)
+
+  implicit none
+  
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: V(n,m)
+  double precision, intent(out) :: accu_d, accu_nd, S(m,m)
+
+  integer                       :: i, j
+
+  S = 0.d0
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , V, size(V, 1), V, size(V, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ''
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+  !print *, ''
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + dabs(S(i,i))
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  !print*, '    diag acc: ', accu_d
+  !print*, ' nondiag acc: ', accu_nd
+
+end subroutine check_orthog
+
+! ---
+
+subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: e0(n)
+  double precision, intent(inout) :: L0(n,n), R0(n,n)
+
+  logical                         :: complex_root
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  double precision                :: accu_d, accu_nd
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), S(:,:), S_inv_half(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = thr_degen_tc
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+  
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i), e0(i)
+  !  endif
+  !enddo
+
+  ! ---
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+  
+      allocate(L(n,m))
+      allocate(R(n,m))
+
+      do j = 1, m
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_orthog_svd(n, m, L)
+      call impose_orthog_svd(n, m, R)
+      !call impose_orthog_GramSchmidt(n, m, L)
+      !call impose_orthog_GramSchmidt(n, m, R)
+
+      ! ---
+
+      !allocate(S(m,m))
+      !call dgemm( 'T', 'N', m, m, n, 1.d0      &
+      !          , L, size(L, 1), R, size(R, 1) &
+      !          , 0.d0, S, size(S, 1) )
+      !allocate(S_inv_half(m,m))
+      !call get_inv_half_nonsymmat_diago(S, m, S_inv_half, complex_root)
+      !if(complex_root) then
+      !  print*, ' complex roots in inv_half !!! '
+      !  stop
+      !endif
+      !call bi_ortho_s_inv_half(m, L, R, S_inv_half)
+      !deallocate(S, S_inv_half)
+
+      call impose_biorthog_svd(n, m, L, R)
+
+      !call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R)
+
+      ! ---
+
+      do j = 1, m
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+
+      deallocate(L, R)
+
+    endif
+  enddo
+
+end subroutine impose_biorthog_degen_eigvec 
+
+! ---
+
+subroutine impose_orthog_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(in)    :: e0(n)
+  double precision, intent(inout) :: L0(n,n), R0(n,n)
+
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  double precision                :: accu_d, accu_nd
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), S(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = thr_degen_tc
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+  
+  do i = 1, n
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+    endif
+  enddo
+
+  ! ---
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+  
+      allocate(L(n,m))
+      allocate(R(n,m))
+
+      do j = 1, m
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_orthog_svd(n, m, L)
+      call impose_orthog_svd(n, m, R)
+
+      ! ---
+  
+      call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R)
+
+      allocate(S(m,m))
+      call check_biorthog(n, m, L, R, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
+      !call check_biorthog(n, m, L, L, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
+      !call check_biorthog(n, m, R, R, accu_d, accu_nd, S, thr_d, thr_nd, .false.)
+      deallocate(S)
+
+      ! ---
+
+      do j = 1, m
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+
+      deallocate(L, R)
+
+    endif
+  enddo
+
+end subroutine impose_orthog_biorthog_degen_eigvec 
+
+! ---
+
+subroutine impose_unique_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, C0, W0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(in)    :: e0(n), W0(n,n), C0(n,n)
+  double precision, intent(inout) :: L0(n,n), R0(n,n)
+
+  logical                         :: complex_root
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), C(:,:)
+  double precision, allocatable   :: S(:,:), S_inv_half(:,:), tmp(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = thr_degen_tc
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+  
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
+
+  ! ---
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+  
+      allocate(L(n,m))
+      allocate(R(n,m))
+      allocate(C(n,m))
+
+      do j = 1, m
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+        C(1:n,j) = C0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_orthog_svd(n, m, L)
+      call impose_orthog_svd(n, m, R)
+
+      ! ---
+
+
+      ! TODO:
+      ! select C correctly via overlap
+      ! or via selecting degen in HF
+
+      !call max_overlap_qr(n, m, C, L)
+      !call max_overlap_qr(n, m, C, R)
+
+
+      allocate(tmp(m,n))
+      allocate(S(m,m))
+
+      call dgemm( 'T', 'N', m, n, n, 1.d0        &
+                , L, size(L, 1), W0, size(W0, 1) &
+                , 0.d0, tmp, size(tmp, 1) )
+      call dgemm( 'N', 'N', m, m, n, 1.d0          &
+                , tmp, size(tmp, 1), C, size(C, 1) &
+                , 0.d0, S, size(S, 1) )
+
+      call max_overlap_qr(n, m, S, L)
+      !call max_overlap_invprod(n, m, S, L)
+
+      call dgemm( 'T', 'N', m, n, n, 1.d0        &
+                , C, size(C, 1), W0, size(W0, 1) &
+                , 0.d0, tmp, size(tmp, 1) )
+      call dgemm( 'N', 'N', m, m, n, 1.d0          &
+                , tmp, size(tmp, 1), R, size(R, 1) &
+                , 0.d0, S, size(S, 1) )
+
+      call max_overlap_qr(n, m, S, R)
+      !call max_overlap_invprod(n, m, S, R)
+
+      deallocate(S, tmp)
+
+      ! ---
+  
+      allocate(S(m,m), S_inv_half(m,m))
+      call dgemm( 'T', 'N', m, m, n, 1.d0      &
+                , L, size(L, 1), R, size(R, 1) &
+                , 0.d0, S, size(S, 1) )
+      call get_inv_half_nonsymmat_diago(S, m, S_inv_half, complex_root)
+      if(complex_root)then
+        call impose_biorthog_svd(n, m, L, R)
+        !call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R)
+      else
+        call bi_ortho_s_inv_half(m, L, R, S_inv_half)
+      endif
+      deallocate(S, S_inv_half)
+
+      ! ---
+
+      do j = 1, m
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+
+      deallocate(L, R, C)
+
+    endif
+  enddo
+
+end subroutine impose_unique_biorthog_degen_eigvec 
+
+! ---
+
+subroutine max_overlap_qr(m, n, S0, V)
+
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: S0(n,n)
+  double precision, intent(inout) :: V(m,n)
+
+  integer                         :: i, j
+  integer                         :: LWORK, INFO
+  double precision, allocatable   :: TAU(:), WORK(:)
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+
+  allocate(S(n,n))
+  S = S0
+
+  ! ---
+
+  allocate( TAU(n), WORK(1) )
+
+  LWORK = -1
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  ! get Q in S matrix
+  LWORK = -1
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, TAU )
+
+  ! ---
+
+  ! V0.T <-- Q.T x V0.T, where Q = S
+
+  allocate( tmp(n,m) )
+
+  call dgemm( 'T', 'T', n, m, n, 1.d0      &
+            , S, size(S, 1), V, size(V, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(S)
+
+  do i = 1, n
+    do j = 1, m
+      V(j,i) = tmp(i,j)
+    enddo
+  enddo
+
+  deallocate(tmp)
+
+  ! ---
+
+  return
+end subroutine max_overlap_qr
+
+! ---
+
+subroutine max_overlap_invprod(n, m, S, V)
+
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: S(m,m)
+  double precision, intent(inout) :: V(n,m)
+
+  integer                         :: i
+  double precision, allocatable   :: invS(:,:), tmp(:,:)
+
+  allocate(invS(m,m))
+  call get_inverse(S, size(S, 1), m, invS, size(invS, 1))
+  print *, ' overlap '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+  print *, ' inv overlap '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') invS(i,:)
+  enddo
+
+  allocate(tmp(n,m))
+  tmp = V
+
+  call dgemm( 'N', 'N', n, m, m, 1.d0        &
+            , tmp, size(tmp, 1), invS, size(invS, 1) &
+            , 0.d0, V, size(V, 1) )
+
+  deallocate(tmp, invS)
+
+  return
+end subroutine max_overlap_invprod
+
+! ---
+
+subroutine impose_biorthog_svd(n, m, L, R)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: L(n,m), R(n,m)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+  double precision, allocatable   :: U(:,:), V(:,:), Vt(:,:), D(:)
+
+  ! ---
+
+  allocate(S(m,m))
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , L, size(L, 1), R, size(R, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  allocate(V(m,m))
+  do i = 1, m
+    do j = 1, m
+      V(j,i) = Vt(i,j)
+    enddo
+  enddo
+  deallocate(Vt)
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- R x V
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , R, size(R, 1), V, size(V, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(V)
+  ! R <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      R(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  ! tmp <-- L x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , L, size(L, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(U)
+  ! L <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      L(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  allocate(S(m,m))
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , L, size(L, 1), R, size(R, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_biorthog_svd
+
+! ---
+
+subroutine impose_weighted_biorthog_qr(m, n, thr_d, thr_nd, Vl, W, Vr)
+
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(inout) :: Vl(m,n), W(m,m), Vr(m,n)
+
+  integer                         :: i, j
+  integer                         :: LWORK, INFO
+  double precision                :: accu_nd, accu_d
+  double precision, allocatable   :: TAU(:), WORK(:)
+  double precision, allocatable   :: S(:,:), R(:,:), tmp(:,:), Stmp(:,:)
+
+
+  call check_weighted_biorthog_binormalize(m, n, Vl, W, Vr, thr_d, thr_nd, .false.)
+  
+  ! ---
+  
+  allocate(Stmp(n,m), S(n,n))
+  call dgemm( 'T', 'N', n, m, m, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'N', 'N', n, n, m, 1.d0              &
+            , Stmp, size(Stmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) then
+        accu_d += S(j,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n))/dble(n) .lt. thr_d)) then
+    print *, ' bi-orthogonal vectors without QR !'
+    deallocate(S)
+    return
+  endif
+
+  ! -------------------------------------------------------------------------------------
+  !                           QR factorization of S: S = Q x R
+
+
+  print *, ' apply QR decomposition ...'
+
+  allocate( TAU(n), WORK(1) )
+
+  LWORK = -1
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  ! save the upper triangular R
+  allocate( R(n,n) )
+  R(:,:) = S(:,:)
+
+  ! get Q
+  LWORK = -1
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, TAU )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               get bi-orhtog left & right vectors:
+  !                                           Vr' = Vr x inv(R) 
+  !                                           Vl' = inv(Q) x Vl =  Q.T   x Vl 
+
+  ! Q.T x Vl, where Q = S
+
+  allocate( tmp(n,m) )
+  call dgemm( 'T', 'T', n, m, n, 1.d0        &
+            , S, size(S, 1), Vl, size(Vl, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  do i = 1, n
+    do j = 1, m
+      Vl(j,i) = tmp(i,j)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  ! ---
+
+  ! inv(R) 
+  !print *, ' inversing upper triangular matrix ...'
+  call dtrtri("U", "N", n, R, n, INFO)
+  if(INFO .ne. 0) then
+    print*,'dtrtri failed !!', INFO
+    stop
+  endif
+  !print *, ' inversing upper triangular matrix OK' 
+
+  do i = 1, n-1
+    do j = i+1, n
+      R(j,i) = 0.d0
+    enddo
+  enddo
+
+  !print *, ' inv(R):'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') R(i,:)
+  !enddo
+
+  ! Vr x inv(R) 
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'N', m, n, n, 1.d0        &
+            , Vr, size(Vr, 1), R, size(R, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate( R )
+
+  do i = 1, n
+    do j = 1, m
+      Vr(j,i) = tmp(j,i)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  call check_weighted_biorthog_binormalize(m, n, Vl, W, Vr, thr_d, thr_nd, .false.)
+
+  return
+end subroutine impose_weighted_biorthog_qr
+
+! ---
+
+subroutine check_weighted_biorthog_binormalize(n, m, Vl, W, Vr, thr_d, thr_nd, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)    :: n, m
+  logical,          intent(in)    :: stop_ifnot
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(inout) :: Vl(n,m), W(n,n), Vr(n,m)
+
+  integer                         :: i, j
+  double precision                :: accu_d, accu_nd, s_tmp
+  double precision, allocatable   :: S(:,:), Stmp(:,:)
+
+  print *, ' check weighted bi-orthonormality'
+
+  ! ---
+
+  allocate(Stmp(m,n), S(m,m))
+  call dgemm( 'T', 'N', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0              &
+            , Stmp, size(Stmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+  !print *, ' overlap matrix before:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! S(i,i) = -1
+  do i = 1, m
+    if( (S(i,i) + 1.d0) .lt. thr_d ) then
+      do j = 1, n
+        Vl(j,i) = -1.d0 * Vl(j,i)
+      enddo
+      S(i,i) = 1.d0
+    endif
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+  print*, '    diag acc: ', accu_d
+  print*, ' nondiag acc: ', accu_nd
+
+  ! ---
+
+  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+
+    do i = 1, m
+      print *, i, S(i,i)
+      if(dabs(S(i,i) - 1.d0) .gt. thr_d) then
+        s_tmp = 1.d0 / dsqrt(S(i,i))
+        do j = 1, n
+          Vl(j,i) = Vl(j,i) * s_tmp 
+          Vr(j,i) = Vr(j,i) * s_tmp 
+        enddo
+      endif
+    enddo
+
+  endif
+
+  ! ---
+
+  allocate(Stmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0              &
+            , Stmp, size(Stmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+  !print *, ' overlap matrix after:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+  print *, '    diag acc: ', accu_d
+  print *, ' nondiag acc: ', accu_nd
+
+  deallocate(S)
+
+  ! ---
+
+  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) ) then
+    print *, accu_nd, thr_nd 
+    print *, dabs(accu_d-dble(m))/dble(m), thr_d
+    print *, ' weighted biorthog_binormalize failed !'
+    stop
+  endif
+
+end subroutine check_weighted_biorthog_binormalize
+
+! ---
+
+subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(in)    :: overlap(n,n)
+  double precision, intent(inout) :: L(n,m), R(n,m)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:),Stmp(:,:)
+  double precision, allocatable   :: U(:,:), V(:,:), Vt(:,:), D(:)
+
+  ! ---
+
+  allocate(S(m,m),Stmp(n,m))
+
+  ! S = C.T x overlap x C
+  call dgemm( 'N', 'N', n, m, n, 1.d0      &
+            , overlap, size(overlap, 1), R, size(R, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , L, size(L, 1), Stmp, size(Stmp, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F25.16,X))') S(i,:)
+  !enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  allocate(V(m,m))
+  do i = 1, m
+    do j = 1, m
+      V(j,i) = Vt(i,j)
+    enddo
+  enddo
+  deallocate(Vt)
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- R x V
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , R, size(R, 1), V, size(V, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(V)
+  ! R <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      R(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  ! tmp <-- L x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , L, size(L, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(U)
+  ! L <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      L(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  allocate(S(m,m),Stmp(n,m))
+  ! S = C.T x overlap x C
+  call dgemm( 'N', 'N', n, m, n, 1.d0      &
+            , overlap, size(overlap, 1), R, size(R, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , L, size(L, 1), Stmp, size(Stmp, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  !print *, ' overlap aft SVD with overlap: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  deallocate(S)
+
+  return
+end subroutine impose_weighted_biorthog_svd
+
+! ---
+
diff --git a/src/non_hermit_dav/new_routines.irp.f b/src/non_hermit_dav/new_routines.irp.f
new file mode 100644
index 00000000..8db044d3
--- /dev/null
+++ b/src/non_hermit_dav/new_routines.irp.f
@@ -0,0 +1,670 @@
+subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  double precision              :: shift_current
+  double precision              :: r,thr,accu_d, accu_nd
+  integer,          allocatable :: iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+  double precision :: accu,thr_cut, thr_norm=1d0
+
+
+  thr_cut = 1.d-15
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+ ! initialization
+  shift_current = 1.d-15
+  iteration = 0 
+  print*,'***** iteration = ',iteration
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+!  print*,'Aw'
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+!   write(*,'(100(F16.10,X))')A(:,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  allocate(im_part(n),iorder(n))
+  allocate( S(n,n) )
+
+
+  Aw = A_save
+  call cancel_small_elmts(aw,n,thr_cut)
+  call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+  do i = 1, n
+   im_part(i) = -dabs(WI(i))
+   iorder(i) = i
+  enddo
+  call dsort(im_part, iorder, n)
+  n_real_eigv = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-20)then
+      n_real_eigv += 1
+    else
+!      print*,'Found an imaginary component to eigenvalue'
+!      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+  if(n_real_eigv.ne.n)then
+   shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+  else
+   print*,'All eigenvalues are real !'
+  endif
+
+
+  do while(n_real_eigv.ne.n)
+   iteration += 1
+   print*,'***** iteration = ',iteration
+   if(shift_current.gt.1.d-3)then
+    print*,'shift_current > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   Aw = A_save
+   call cancel_small_elmts(Aw,n,thr_cut)
+   call split_matrix_degen(Aw,n,shift_current)
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   n_real_eigv = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_real_eigv+= 1
+     else
+!       print*,'Found an imaginary component to eigenvalue'
+!       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   if(n_real_eigv.ne.n)then
+    do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+    enddo
+    call dsort(im_part, iorder, n)
+    shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+    print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+    print*,'Splitting the degeneracies by ',shift_current
+   else
+    print*,'All eigenvalues are real !'
+   endif
+  enddo
+  !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES 
+  do i = 1, n
+   eigval(i) = WR(i)
+   iorder(i) = i
+  enddo
+  call dsort(eigval,iorder,n)
+  do i = 1, n
+!   print*,'eigval(i) = ',eigval(i)
+   reigvec_tmp(:,i) = VR(:,iorder(i))
+   leigvec_tmp(:,i) = Vl(:,iorder(i))
+  enddo
+
+!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
+  !                               check bi-orthogonality
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else
+    print *, ' '
+    print *, ' bi-orthogonality: not imposed yet'
+    print *, ' '
+    print *, ' '
+    print *, ' orthog between degen eigenvect' 
+    print *, ' '
+    double precision, allocatable :: S_nh_inv_half(:,:)
+    allocate(S_nh_inv_half(n,n))
+    logical :: complex_root
+    deallocate(S_nh_inv_half)
+    call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp)
+    call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp)
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S)
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+     if(accu_nd .lt. thresh_biorthog_nondiag) then
+       print *, ' bi-orthogonality: ok'
+     else 
+      print*,'New vectors not bi-orthonormals at ',accu_nd
+      print*,'Must be a deep problem ...'
+      stop
+     endif
+    endif
+  endif
+ 
+  !! EIGENVECTORS SORTED AND BI-ORTHONORMAL
+  do i = 1, n
+   do j = 1, n
+    VR(iorder_origin(j),i) = reigvec_tmp(j,i)
+    VL(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+  !! RECOMPUTING THE EIGENVALUES 
+  eigval = 0.d0
+  do i = 1, n
+   iorder(i) = i
+   accu = 0.d0
+   do j = 1, n
+    accu += VL(j,i) * VR(j,i) 
+    do k = 1, n
+     eigval(i) +=  VL(j,i) * A(j,k) * VR(k,i) 
+    enddo
+   enddo
+   eigval(i) *= 1.d0/accu
+!   print*,'eigval(i) = ',eigval(i)
+  enddo
+  !! RESORT JUST TO BE SURE
+  call dsort(eigval, iorder, n)
+  do i = 1, n
+   do j = 1, n
+    reigvec(j,i) = VR(j,iorder(i))
+    leigvec(j,i) = VL(j,iorder(i))
+   enddo
+  enddo
+  print*,'Checking for final reigvec/leigvec'
+  shift_current = max(1.d-10,shift_current)
+  print*,'Thr for eigenvectors = ',shift_current
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else 
+   print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
+   print*,'Eigenvectors are not bi orthonormal ..'
+   print*,'accu_nd = ',accu_nd
+   stop
+  endif
+
+end 
+
+
+
+subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  double precision              :: shift_current
+  double precision              :: r,thr,accu_d, accu_nd
+  integer,          allocatable :: iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+  double precision :: accu,thr_cut, thr_norm=1.d0
+  double precision, allocatable :: S_nh_inv_half(:,:)
+  logical :: complex_root
+
+
+  thr_cut = 1.d-15
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+ ! initialization
+  shift_current = 1.d-15
+  iteration = 0 
+  print*,'***** iteration = ',iteration
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+!  print*,'Aw'
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+!   write(*,'(100(F16.10,X))')A(:,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  allocate(im_part(n),iorder(n))
+  allocate( S(n,n) )
+  allocate(S_nh_inv_half(n,n))
+
+
+  Aw = A_save
+  call cancel_small_elmts(aw,n,thr_cut)
+  call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+  do i = 1, n
+   im_part(i) = -dabs(WI(i))
+   iorder(i) = i
+  enddo
+  call dsort(im_part, iorder, n)
+  n_real_eigv = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-20)then
+      n_real_eigv += 1
+    else
+!      print*,'Found an imaginary component to eigenvalue'
+!      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+  if(n_real_eigv.ne.n)then
+   shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+  else
+   print*,'All eigenvalues are real !'
+  endif
+
+
+  do while(n_real_eigv.ne.n)
+   iteration += 1
+   print*,'***** iteration = ',iteration
+   if(shift_current.gt.1.d-3)then
+    print*,'shift_current > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   Aw = A_save
+!   thr_cut = shift_current
+   call cancel_small_elmts(Aw,n,thr_cut)
+   call split_matrix_degen(Aw,n,shift_current)
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   n_real_eigv = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_real_eigv+= 1
+     else
+!       print*,'Found an imaginary component to eigenvalue'
+!       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   if(n_real_eigv.ne.n)then
+    do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+    enddo
+    call dsort(im_part, iorder, n)
+    shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+    print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+    print*,'Splitting the degeneracies by ',shift_current
+   else
+    print*,'All eigenvalues are real !'
+   endif
+  enddo
+  !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES 
+  do i = 1, n
+   eigval(i) = WR(i)
+   iorder(i) = i
+  enddo
+  call dsort(eigval,iorder,n)
+  do i = 1, n
+!   print*,'eigval(i) = ',eigval(i)
+   reigvec_tmp(:,i) = VR(:,iorder(i))
+   leigvec_tmp(:,i) = Vl(:,iorder(i))
+  enddo
+
+!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
+  !                               check bi-orthogonality
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else
+    print *, ' '
+    print *, ' bi-orthogonality: not imposed yet'
+    if(complex_root) then 
+     print *, ' '
+     print *, ' '
+     print *, ' orthog between degen eigenvect' 
+     print *, ' '
+     ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
+     call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) ! orthogonalization of reigvec
+     call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) ! orthogonalization of leigvec
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S,  thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) 
+
+     if(accu_nd .lt. thresh_biorthog_nondiag) then
+       print *, ' bi-orthogonality: ok'
+     else 
+      print*,'New vectors not bi-orthonormals at ', accu_nd
+      call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half, complex_root)
+      if(complex_root)then 
+       call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR
+      else
+       print*,'S^{-1/2} exists !!'
+       call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
+      endif
+     endif
+    else ! the matrix S^{-1/2} exists
+     print*,'S^{-1/2} exists !!'
+     call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
+    endif
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     print*,'Must be a deep problem ...'
+     stop
+    endif
+  endif
+ 
+  !! EIGENVECTORS SORTED AND BI-ORTHONORMAL
+  do i = 1, n
+   do j = 1, n
+    VR(iorder_origin(j),i) = reigvec_tmp(j,i)
+    VL(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+  !! RECOMPUTING THE EIGENVALUES 
+  eigval = 0.d0
+  do i = 1, n
+   iorder(i) = i
+   accu = 0.d0
+   do j = 1, n
+    accu += VL(j,i) * VR(j,i) 
+    do k = 1, n
+     eigval(i) +=  VL(j,i) * A(j,k) * VR(k,i) 
+    enddo
+   enddo
+   eigval(i) *= 1.d0/accu
+!   print*,'eigval(i) = ',eigval(i)
+  enddo
+  !! RESORT JUST TO BE SURE
+  call dsort(eigval, iorder, n)
+  do i = 1, n
+   do j = 1, n
+    reigvec(j,i) = VR(j,iorder(i))
+    leigvec(j,i) = VL(j,iorder(i))
+   enddo
+  enddo
+  print*,'Checking for final reigvec/leigvec'
+  shift_current = max(1.d-10,shift_current)
+  print*,'Thr for eigenvectors = ',shift_current
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else 
+   print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
+   print*,'Eigenvectors are not bi orthonormal ..'
+   print*,'accu_nd = ',accu_nd
+   stop
+  endif
+
+end 
+
+
+subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine returning the eigenvalues and left/right eigenvectors of the TC fock matrix 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  double precision              :: shift_current
+  double precision              :: r,thr,accu_d, accu_nd
+  integer,          allocatable :: iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+  double precision :: accu,thr_cut
+  double precision, allocatable :: S_nh_inv_half(:,:)
+  logical :: complex_root
+  double precision :: thr_norm=1d0
+
+
+  thr_cut = 1.d-15
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+ ! initialization
+  shift_current = 1.d-15
+  iteration = 0 
+  print*,'***** iteration = ',iteration
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+!  print*,'Aw'
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+!   write(*,'(100(F16.10,X))')A(:,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  allocate(im_part(n),iorder(n))
+  allocate( S(n,n) )
+  allocate(S_nh_inv_half(n,n))
+
+
+  Aw = A_save
+  call cancel_small_elmts(aw,n,thr_cut)
+  call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+  do i = 1, n
+   im_part(i) = -dabs(WI(i))
+   iorder(i) = i
+  enddo
+  call dsort(im_part, iorder, n)
+  n_real_eigv = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-20)then
+      n_real_eigv += 1
+    else
+!      print*,'Found an imaginary component to eigenvalue'
+!      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+  if(n_real_eigv.ne.n)then
+   shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+  else
+   print*,'All eigenvalues are real !'
+  endif
+
+
+  do while(n_real_eigv.ne.n)
+   iteration += 1
+   print*,'***** iteration = ',iteration
+   if(shift_current.gt.1.d-3)then
+    print*,'shift_current > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   Aw = A_save
+!   thr_cut = shift_current
+   call cancel_small_elmts(Aw,n,thr_cut)
+   call split_matrix_degen(Aw,n,shift_current)
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   n_real_eigv = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_real_eigv+= 1
+     else
+!       print*,'Found an imaginary component to eigenvalue'
+!       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   if(n_real_eigv.ne.n)then
+    do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+    enddo
+    call dsort(im_part, iorder, n)
+    shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+    print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+    print*,'Splitting the degeneracies by ',shift_current
+   else
+    print*,'All eigenvalues are real !'
+   endif
+  enddo
+  !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES 
+  do i = 1, n
+   eigval(i) = WR(i)
+   iorder(i) = i
+  enddo
+  call dsort(eigval,iorder,n)
+  do i = 1, n
+!   print*,'eigval(i) = ',eigval(i)
+   reigvec_tmp(:,i) = VR(:,iorder(i))
+   leigvec_tmp(:,i) = Vl(:,iorder(i))
+  enddo
+
+!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
+  !                               check bi-orthogonality
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else
+    print *, ' '
+    print *, ' bi-orthogonality: not imposed yet'
+    print *, ' '
+    print *, ' '
+    print *, ' Using impose_unique_biorthog_degen_eigvec' 
+    print *, ' '
+    ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
+    call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, leigvec_tmp, reigvec_tmp)
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    print*,'accu_nd = ',accu_nd
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half,complex_root)
+     if(complex_root)then 
+      print*,'S^{-1/2} does not exits, using QR bi-orthogonalization'
+      call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR
+     else
+      print*,'S^{-1/2} exists !!'
+      call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
+     endif
+    endif
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     print*,'Must be a deep problem ...'
+     stop
+    endif
+  endif
+ 
+  !! EIGENVECTORS SORTED AND BI-ORTHONORMAL
+  do i = 1, n
+   do j = 1, n
+    VR(iorder_origin(j),i) = reigvec_tmp(j,i)
+    VL(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+  !! RECOMPUTING THE EIGENVALUES 
+  eigval = 0.d0
+  do i = 1, n
+   iorder(i) = i
+   accu = 0.d0
+   do j = 1, n
+    accu += VL(j,i) * VR(j,i) 
+    do k = 1, n
+     eigval(i) +=  VL(j,i) * A(j,k) * VR(k,i) 
+    enddo
+   enddo
+   eigval(i) *= 1.d0/accu
+!   print*,'eigval(i) = ',eigval(i)
+  enddo
+  !! RESORT JUST TO BE SURE
+  call dsort(eigval, iorder, n)
+  do i = 1, n
+   do j = 1, n
+    reigvec(j,i) = VR(j,iorder(i))
+    leigvec(j,i) = VL(j,iorder(i))
+   enddo
+  enddo
+  print*,'Checking for final reigvec/leigvec'
+  shift_current = max(1.d-10,shift_current)
+  print*,'Thr for eigenvectors = ',shift_current
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthogonality: ok'
+  else 
+   print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
+   print*,'Eigenvectors are not bi orthonormal ..'
+   print*,'accu_nd = ',accu_nd
+   stop
+  endif
+
+end 
+
+
diff --git a/src/non_hermit_dav/project.irp.f b/src/non_hermit_dav/project.irp.f
new file mode 100644
index 00000000..c04719ac
--- /dev/null
+++ b/src/non_hermit_dav/project.irp.f
@@ -0,0 +1,53 @@
+subroutine h_non_hermite(v,u,Hmat,a,N_st,sze) 
+ implicit none
+ BEGIN_DOC
+ ! Template of routine for the application of H
+ !
+ ! Here, it is done with the Hamiltonian matrix 
+ !
+ ! on the set of determinants of psi_det 
+ !
+ ! Computes $v = a * H | u \rangle$ 
+ !
+ END_DOC
+ integer, intent(in)              :: N_st,sze
+ double precision, intent(in)     :: u(sze,N_st), Hmat(sze,sze), a
+ double precision, intent(inout)  :: v(sze,N_st)
+ integer :: i,j,k 
+ do k = 1, N_st
+  do j = 1, sze
+   do i = 1, sze
+    v(i,k) += a * u(j,k) * Hmat(i,j)
+   enddo
+  enddo
+ enddo
+end
+
+
+subroutine exp_tau_H(u,v,hmat,tau,et,N_st,sze)
+ implicit none
+ BEGIN_DOC
+! realises v = (1 - tau (H - et)) u
+ END_DOC
+ integer, intent(in) :: N_st,sze
+ double precision, intent(in) :: hmat(sze,sze), u(sze,N_st), tau, et
+ double precision, intent(out):: v(sze,N_st)
+ double precision :: a
+ integer :: i,j
+ v = (1.d0 + tau * et) * u 
+ a = -1.d0 * tau
+ call h_non_hermite(v,u,Hmat,a,N_st,sze)
+end
+
+double precision function project_phi0(u,Hmat0,N_st,sze)
+ implicit none
+ integer, intent(in)              :: N_st,sze
+ double precision, intent(in)     :: u(sze,N_st), Hmat0(sze)
+ integer :: j
+ project_phi0 = 0.d0
+ do j = 1, sze
+  project_phi0 += u(j,1) * Hmat0(j) 
+ enddo
+ project_phi0 *= 1.d0 / u(1,1)
+end
+
diff --git a/src/non_hermit_dav/utils.irp.f b/src/non_hermit_dav/utils.irp.f
new file mode 100644
index 00000000..7f331a6b
--- /dev/null
+++ b/src/non_hermit_dav/utils.irp.f
@@ -0,0 +1,325 @@
+
+subroutine get_inv_half_svd(matrix, n, matrix_inv_half)
+
+  BEGIN_DOC
+  !   :math:`X = S^{-1/2}` obtained by SVD
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: matrix(n,n)
+  double precision, intent(out) :: matrix_inv_half(n,n)
+
+  integer                       :: num_linear_dependencies
+  integer                       :: LDA, LDC
+  integer                       :: info, i, j, k
+  double precision, parameter   :: threshold = 1.d-6
+  double precision, allocatable :: U(:,:),Vt(:,:), D(:),matrix_half(:,:),D_half(:)
+
+  double precision :: accu_d,accu_nd
+
+  LDA = size(matrix, 1)
+  LDC = size(matrix_inv_half, 1)
+  if(LDA .ne. LDC) then
+    print*, ' LDA != LDC'
+    stop
+  endif
+
+  print*, ' n   = ', n
+  print*, ' LDA = ', LDA
+  print*, ' LDC = ', LDC
+
+  double precision,allocatable :: WR(:),WI(:),VL(:,:),VR(:,:)
+  allocate(WR(n),WI(n),VL(n,n),VR(n,n))
+  call lapack_diag_non_sym(n,matrix,WR,WI,VL,VR)
+  do i = 1, n
+    print*,'WR,WI',WR(i),WI(i)
+  enddo
+
+
+  allocate(U(LDC,n), Vt(LDA,n), D(n))
+
+  call svd(matrix, LDA, U, LDC, D, Vt, LDA, n, n)
+  double precision, allocatable :: tmp1(:,:),tmp2(:,:),D_mat(:,:)
+  allocate(tmp1(n,n),tmp2(n,n),D_mat(n,n),matrix_half(n,n),D_half(n))
+  D_mat = 0.d0
+  do i = 1,n
+   D_mat(i,i) = D(i)
+  enddo
+  ! matrix = U D Vt
+  ! tmp1 = U D
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            , U, size(U, 1), D_mat, size(D_mat, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  ! tmp2 = tmp1 X Vt = matrix
+  tmp2 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            , tmp1, size(tmp1, 1), Vt, size(Vt, 1) &
+            , 0.d0, tmp2, size(tmp2, 1) )
+  print*,'Checking the recomposition of the matrix'
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+   accu_d += dabs(tmp2(i,i) - matrix(i,i))
+   do j = 1, n
+    if(i==j)cycle 
+    accu_nd += dabs(tmp2(j,i) - matrix(j,i))
+   enddo
+  enddo
+  print*,'accu_d  =',accu_d
+  print*,'accu_nd =',accu_nd
+  print*,'passed the recomposition'
+
+  num_linear_dependencies = 0
+  do i = 1, n
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D_half(i) = dsqrt(D(i))
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  write(*,*) ' linear dependencies', num_linear_dependencies
+
+  matrix_inv_half = 0.d0
+  matrix_half = 0.d0
+  do k = 1, n
+    if(D(k) /= 0.d0) then
+      do j = 1, n
+        do i = 1, n
+!          matrix_inv_half(i,j) = matrix_inv_half(i,j) + U(i,k) * D(k) * Vt(k,j)
+           matrix_inv_half(i,j) = matrix_inv_half(i,j) + U(i,k) * D(k) * Vt(j,k)
+           matrix_half(i,j) = matrix_half(i,j) + U(i,k) * D_half(k) * Vt(j,k)
+        enddo
+      enddo
+    endif
+  enddo
+  print*,'testing S^1/2 * S^1/2= S'
+  ! tmp1 = S^1/2 X S^1/2
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            , matrix_half, size(matrix_half, 1), matrix_half, size(matrix_half, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+   accu_d += dabs(tmp1(i,i) - matrix(i,i))
+   do j = 1, n
+    if(i==j)cycle 
+    accu_nd += dabs(tmp1(j,i) - matrix(j,i))
+   enddo
+  enddo
+  print*,'accu_d  =',accu_d
+  print*,'accu_nd =',accu_nd
+
+!  print*,'S inv half'
+!  do i = 1, n
+!   write(*, '(1000(F16.10,X))') matrix_inv_half(i,:)
+!  enddo
+
+  double precision, allocatable :: pseudo_inverse(:,:),identity(:,:)
+  allocate( pseudo_inverse(n,n),identity(n,n))
+  call get_pseudo_inverse(matrix,n,n,n,pseudo_inverse,n,threshold)
+ 
+  ! S^-1 X S = 1
+!  identity = 0.d0
+!  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+!            , matrix, size(matrix, 1), pseudo_inverse, size(pseudo_inverse, 1) &
+!            , 0.d0, identity, size(identity, 1) )
+  print*,'Checking  S^-1/2 X S^-1/2 = S^-1 ?'
+  ! S^-1/2 X S^-1/2 = S^-1 ?
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            ,matrix_inv_half, size(matrix_inv_half, 1), matrix_inv_half, size(matrix_inv_half, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+   accu_d += dabs(1.d0 - pseudo_inverse(i,i))
+   do j = 1, n
+    if(i==j)cycle 
+    accu_nd += dabs(tmp1(j,i) - pseudo_inverse(j,i))
+   enddo
+  enddo
+  print*,'accu_d  =',accu_d
+  print*,'accu_nd =',accu_nd
+
+  stop
+!
+!  ! ( S^-1/2 x S ) x S^-1/2
+!  Stmp2 = 0.d0
+!  call dgemm( 'N', 'N', n, n, n, 1.d0                                        &
+!            , Stmp, size(Stmp, 1), matrix_inv_half, size(matrix_inv_half, 1) &
+!            , 0.d0, Stmp2, size(Stmp2, 1) )
+
+  ! S^-1/2 x ( S^-1/2 x S )
+!  Stmp2 = 0.d0
+!  call dgemm( 'N', 'N', n, n, n, 1.d0                                        &
+!            , matrix_inv_half, size(matrix_inv_half, 1), Stmp, size(Stmp, 1) &
+!            , 0.d0, Stmp2, size(Stmp2, 1) )
+ 
+!  do i = 1, n
+!    do j = 1, n
+!      if(i==j) then
+!       accu_d += Stmp2(j,i)
+!      else 
+!       accu_nd = accu_nd + Stmp2(j,i) * Stmp2(j,i)
+!      endif
+!    enddo
+!  enddo
+!  accu_nd = dsqrt(accu_nd)
+!  print*, ' after S^-1/2: sum of off-diag S elements = ', accu_nd
+!  print*, ' after S^-1/2: sum of     diag S elements = ', accu_d
+!  do i = 1, n
+!   write(*,'(1000(F16.10,X))') Stmp2(i,:)
+!  enddo
+
+  !double precision  :: thresh
+  !thresh = 1.d-10
+  !if( accu_nd.gt.thresh .or. dabs(accu_d-dble(n)).gt.thresh) then
+  !  stop
+  !endif
+
+end subroutine get_inv_half_svd
+
+! ---
+
+subroutine get_inv_half_nonsymmat_diago(matrix, n, matrix_inv_half, complex_root)
+
+  BEGIN_DOC
+  ! input:  S = matrix
+  ! output: S^{-1/2} = matrix_inv_half obtained by diagonalization
+  !
+  ! S = VR D VL^T
+  !   = VR D^{1/2} D^{1/2} VL^T
+  !   = VR D^{1/2} VL^T VR D^{1/2} VL^T
+  !   = S^{1/2} S^{1/2} with S = VR D^{1/2} VL^T 
+  !
+  ! == > S^{-1/2} = VR D^{-1/2} VL^T
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: matrix(n,n)
+  logical,          intent(out) :: complex_root
+  double precision, intent(out) :: matrix_inv_half(n,n)
+
+  integer                       :: i, j
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:), S(:,:), S_diag(:)
+  double precision, allocatable :: tmp1(:,:), D_mat(:,:)
+
+  complex_root = .False.
+
+  matrix_inv_half = 0.D0
+  print*,'Computing S^{-1/2}'
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n))
+  call lapack_diag_non_sym(n, matrix, WR, WI, VL, VR)
+
+  allocate(S(n,n))
+  call check_biorthog(n, n, VL, VR, accu_d, accu_nd, S)
+  print*,'accu_nd S^{-1/2}',accu_nd
+  if(accu_nd.gt.1.d-10) then
+    complex_root = .True. ! if vectors are not bi-orthogonal return 
+    print*,'Eigenvectors of S are not bi-orthonormal, skipping S^{-1/2}'
+    return
+  endif
+
+  allocate(S_diag(n))
+  do i = 1, n
+    S_diag(i) = 1.d0/dsqrt(S(i,i))
+    if(dabs(WI(i)).gt.1.d-20.or.WR(i).lt.0.d0)then ! check that eigenvalues are real and positive 
+     complex_root = .True.
+     print*,'Eigenvalues of S have imaginary part '
+     print*,'WR(i),WI(i)',WR(i), WR(i)
+     print*,'Skipping S^{-1/2}'
+     return
+    endif
+  enddo
+  deallocate(S)
+
+  if(complex_root) return
+
+  ! normalization of vectors 
+  do i = 1, n
+    if(S_diag(i).eq.1.d0) cycle
+    do j = 1,n
+      VL(j,i) *= S_diag(i)
+      VR(j,i) *= S_diag(i)
+    enddo
+  enddo
+  deallocate(S_diag)
+
+  allocate(tmp1(n,n), D_mat(n,n))
+
+  D_mat = 0.d0
+  do i = 1, n
+    D_mat(i,i) = 1.d0/dsqrt(WR(i))
+  enddo
+  deallocate(WR, WI)
+
+  ! tmp1 = VR D^{-1/2} 
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                &
+            , VR, size(VR, 1), D_mat, size(D_mat, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  deallocate(VR, D_mat)
+
+  ! S^{-1/2} = tmp1 X VL^T 
+  matrix_inv_half = 0.d0
+  call dgemm( 'N', 'T', n, n, n, 1.d0              &
+            , tmp1, size(tmp1, 1), VL, size(VL, 1) &
+            , 0.d0, matrix_inv_half, size(matrix_inv_half, 1) )
+  deallocate(tmp1, VL)
+
+end
+
+! ---
+
+subroutine bi_ortho_s_inv_half(n,leigvec,reigvec,S_nh_inv_half)
+ implicit  none
+ integer, intent(in) :: n
+ double precision, intent(in) :: S_nh_inv_half(n,n)
+ double precision, intent(inout) :: leigvec(n,n),reigvec(n,n)
+ BEGIN_DOC 
+ ! bi-orthonormalization of left and right vectors 
+ ! 
+ ! S = VL^T VR 
+ !
+ ! S^{-1/2} S S^{-1/2} = 1 = S^{-1/2} VL^T VR S^{-1/2} = VL_new^T VR_new
+ !
+ ! VL_new = VL (S^{-1/2})^T
+ ! 
+ ! VR_new = VR S^{^{-1/2}}
+ END_DOC
+ double precision,allocatable :: vl_tmp(:,:),vr_tmp(:,:)
+ print*,'Bi-orthonormalization using S^{-1/2}'
+ allocate(vl_tmp(n,n),vr_tmp(n,n))
+ vl_tmp = leigvec
+ vr_tmp = reigvec
+ ! VL_new = VL (S^{-1/2})^T
+ call dgemm( 'N', 'T', n, n, n, 1.d0                                            &
+             , vl_tmp, size(vl_tmp, 1), S_nh_inv_half, size(S_nh_inv_half, 1) &
+             , 0.d0, leigvec, size(leigvec, 1) )
+ ! VR_new = VR S^{^{-1/2}}
+ call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+             , vr_tmp, size(vr_tmp, 1), S_nh_inv_half, size(S_nh_inv_half, 1) &
+             , 0.d0, reigvec, size(reigvec, 1) )
+ double precision :: accu_d, accu_nd
+ double precision,allocatable :: S(:,:) 
+ allocate(S(n,n)) 
+ call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S)
+ if(dabs(accu_d - n).gt.1.d-10 .or. accu_nd .gt.1.d-8 )then
+  print*,'Pb in bi_ortho_s_inv_half !!'
+  print*,'accu_d =',accu_d
+  print*,'accu_nd =',accu_nd
+  stop
+ endif
+end

From a4bb488d64ab90ac7fb2ba7666e9d5636b060076 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Mon, 6 Feb 2023 19:26:58 +0100
Subject: [PATCH 5/7] tc_scf compiles and gives good energy for Ne. Added a
 test in test_Ne.sh

---
 src/tc_scf/EZFIO.cfg                         |    4 +
 src/tc_scf/NEED                              |    6 +
 src/tc_scf/combine_lr_tcscf.irp.f            |   74 ++
 src/tc_scf/diago_bi_ort_tcfock.irp.f         |  229 ++++
 src/tc_scf/diis_tcscf.irp.f                  |  186 ++++
 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f        |  405 +++++++
 src/tc_scf/fock_for_right.irp.f              |  107 ++
 src/tc_scf/fock_tc.irp.f                     |  307 ++++++
 src/tc_scf/fock_tc_mo_tot.irp.f              |  144 +++
 src/tc_scf/fock_three.irp.f                  |  229 ++++
 src/tc_scf/fock_three_bi_ortho.irp.f         |  178 ++++
 src/tc_scf/fock_three_bi_ortho_new_new.irp.f |  286 +++++
 src/tc_scf/fock_three_utils.irp.f            |  140 +++
 src/tc_scf/integrals_in_r_stuff.irp.f        |  391 +++++++
 src/tc_scf/minimize_tc_angles.irp.f          |   12 +
 src/tc_scf/molden_lr_mos.irp.f               |  176 +++
 src/tc_scf/print_angle_tc_orb.irp.f          |    9 +
 src/tc_scf/print_fit_param.irp.f             |   60 ++
 src/tc_scf/rh_tcscf.irp.f                    |  336 ++++++
 src/tc_scf/rh_tcscf_diis.irp.f               |  362 +++++++
 src/tc_scf/rh_tcscf_simple.irp.f             |  129 +++
 src/tc_scf/rotate_tcscf_orbitals.irp.f       |  367 +++++++
 src/tc_scf/routines_rotates.irp.f            |  359 +++++++
 src/tc_scf/tc_petermann_factor.irp.f         |   78 ++
 src/tc_scf/tc_scf.irp.f                      |   75 ++
 src/tc_scf/tc_scf_dm.irp.f                   |   37 +
 src/tc_scf/tc_scf_energy.irp.f               |   34 +
 src/tc_scf/tc_scf_utils.irp.f                |   43 +
 src/tc_scf/test_Ne.sh                        |   13 +
 src/tc_scf/test_int.irp.f                    | 1003 ++++++++++++++++++
 src/tc_scf/three_e_energy_bi_ortho.irp.f     |  174 +++
 src/utils/block_diag_degen.irp.f             |  218 ++++
 src/utils/loc.f                              |  327 ++++++
 33 files changed, 6498 insertions(+)
 create mode 100644 src/tc_scf/EZFIO.cfg
 create mode 100644 src/tc_scf/NEED
 create mode 100644 src/tc_scf/combine_lr_tcscf.irp.f
 create mode 100644 src/tc_scf/diago_bi_ort_tcfock.irp.f
 create mode 100644 src/tc_scf/diis_tcscf.irp.f
 create mode 100644 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
 create mode 100644 src/tc_scf/fock_for_right.irp.f
 create mode 100644 src/tc_scf/fock_tc.irp.f
 create mode 100644 src/tc_scf/fock_tc_mo_tot.irp.f
 create mode 100644 src/tc_scf/fock_three.irp.f
 create mode 100644 src/tc_scf/fock_three_bi_ortho.irp.f
 create mode 100644 src/tc_scf/fock_three_bi_ortho_new_new.irp.f
 create mode 100644 src/tc_scf/fock_three_utils.irp.f
 create mode 100644 src/tc_scf/integrals_in_r_stuff.irp.f
 create mode 100644 src/tc_scf/minimize_tc_angles.irp.f
 create mode 100644 src/tc_scf/molden_lr_mos.irp.f
 create mode 100644 src/tc_scf/print_angle_tc_orb.irp.f
 create mode 100644 src/tc_scf/print_fit_param.irp.f
 create mode 100644 src/tc_scf/rh_tcscf.irp.f
 create mode 100644 src/tc_scf/rh_tcscf_diis.irp.f
 create mode 100644 src/tc_scf/rh_tcscf_simple.irp.f
 create mode 100644 src/tc_scf/rotate_tcscf_orbitals.irp.f
 create mode 100644 src/tc_scf/routines_rotates.irp.f
 create mode 100644 src/tc_scf/tc_petermann_factor.irp.f
 create mode 100644 src/tc_scf/tc_scf.irp.f
 create mode 100644 src/tc_scf/tc_scf_dm.irp.f
 create mode 100644 src/tc_scf/tc_scf_energy.irp.f
 create mode 100644 src/tc_scf/tc_scf_utils.irp.f
 create mode 100755 src/tc_scf/test_Ne.sh
 create mode 100644 src/tc_scf/test_int.irp.f
 create mode 100644 src/tc_scf/three_e_energy_bi_ortho.irp.f
 create mode 100644 src/utils/block_diag_degen.irp.f
 create mode 100644 src/utils/loc.f

diff --git a/src/tc_scf/EZFIO.cfg b/src/tc_scf/EZFIO.cfg
new file mode 100644
index 00000000..313d6f2b
--- /dev/null
+++ b/src/tc_scf/EZFIO.cfg
@@ -0,0 +1,4 @@
+[bitc_energy]
+type: Threshold
+doc: Energy bi-tc HF
+interface: ezfio
diff --git a/src/tc_scf/NEED b/src/tc_scf/NEED
new file mode 100644
index 00000000..4e340cfe
--- /dev/null
+++ b/src/tc_scf/NEED
@@ -0,0 +1,6 @@
+hartree_fock
+bi_ortho_mos
+three_body_ints
+bi_ort_ints
+tc_keywords
+non_hermit_dav
diff --git a/src/tc_scf/combine_lr_tcscf.irp.f b/src/tc_scf/combine_lr_tcscf.irp.f
new file mode 100644
index 00000000..b257f4a5
--- /dev/null
+++ b/src/tc_scf/combine_lr_tcscf.irp.f
@@ -0,0 +1,74 @@
+
+! ---
+
+program combine_lr_tcscf
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  bi_ortho = .True.
+  touch bi_ortho
+
+  call comb_orbitals()
+
+end
+
+! ---
+
+subroutine comb_orbitals()
+
+  implicit none
+  integer                       :: i, m, n, nn, mm
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: R(:,:), L(:,:), Rnew(:,:), tmp(:,:), S(:,:)
+
+  n  = ao_num
+  m  = mo_num
+  nn = elec_alpha_num
+  mm = m - nn
+
+  allocate(L(n,m), R(n,m), Rnew(n,m), S(m,m))
+  L = mo_l_coef
+  R = mo_r_coef
+
+  call check_weighted_biorthog(n, m, ao_overlap, L, R, accu_d, accu_nd, S, .true.)
+
+  allocate(tmp(n,nn))
+  do i = 1, nn 
+    tmp(1:n,i) = R(1:n,i)
+  enddo
+  call impose_weighted_orthog_svd(n, nn, ao_overlap, tmp)
+  do i = 1, nn
+    Rnew(1:n,i) = tmp(1:n,i)
+  enddo
+  deallocate(tmp)
+
+  allocate(tmp(n,mm))
+  do i = 1, mm
+    tmp(1:n,i) = L(1:n,i+nn)
+  enddo
+  call impose_weighted_orthog_svd(n, mm, ao_overlap, tmp)
+  do i = 1, mm
+    Rnew(1:n,i+nn) = tmp(1:n,i)
+  enddo
+  deallocate(tmp)
+
+  call check_weighted_biorthog(n, m, ao_overlap, Rnew, Rnew, accu_d, accu_nd, S, .true.)
+
+  mo_r_coef = Rnew
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+
+  deallocate(L, R, Rnew, S)
+
+end subroutine comb_orbitals
+
+! ---
+
diff --git a/src/tc_scf/diago_bi_ort_tcfock.irp.f b/src/tc_scf/diago_bi_ort_tcfock.irp.f
new file mode 100644
index 00000000..726169d9
--- /dev/null
+++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f
@@ -0,0 +1,229 @@
+! ---
+
+ BEGIN_PROVIDER [ double precision, fock_tc_reigvec_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, fock_tc_leigvec_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, eigval_fock_tc_mo, (mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_fock_tc_eigvec_mo, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  ! EIGENVECTORS OF FOCK MATRIX ON THE MO BASIS and their OVERLAP
+  END_DOC
+
+  implicit none
+  integer                       :: n_real_tc 
+  integer                       :: i, j, k, l
+  double precision              :: accu_d, accu_nd, accu_tmp
+  double precision              :: norm
+  double precision, allocatable :: eigval_right_tmp(:)
+  double precision, allocatable :: F_tmp(:,:)
+
+  allocate( eigval_right_tmp(mo_num), F_tmp(mo_num,mo_num) )
+
+  PROVIDE Fock_matrix_tc_mo_tot
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      F_tmp(j,i) = Fock_matrix_tc_mo_tot(j,i)
+    enddo
+  enddo
+  ! insert level shift here
+  do i = elec_beta_num+1, elec_alpha_num
+    F_tmp(i,i) += 0.5d0 * level_shift_tcscf
+  enddo
+  do i = elec_alpha_num+1, mo_num
+    F_tmp(i,i) += level_shift_tcscf
+  enddo
+
+  call non_hrmt_bieig( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag &
+                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo                       & 
+                     , n_real_tc, eigval_right_tmp )
+
+  !if(max_ov_tc_scf)then
+  ! call non_hrmt_fock_mat( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag &
+  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo                          & 
+  !                    , n_real_tc, eigval_right_tmp )
+  !else 
+  ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, F_tmp     &
+  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
+  !                    , n_real_tc, eigval_right_tmp )
+  !endif
+
+  deallocate(F_tmp)
+
+
+!  if(n_real_tc .ne. mo_num)then
+!   print*,'n_real_tc ne mo_num ! ',n_real_tc
+!   stop
+!  endif
+
+  eigval_fock_tc_mo = eigval_right_tmp
+!  print*,'Eigenvalues of Fock_matrix_tc_mo_tot'
+!  do i = 1, elec_alpha_num
+!    print*, i, eigval_fock_tc_mo(i)
+!  enddo
+!  do i = elec_alpha_num+1, mo_num 
+!    print*, i, eigval_fock_tc_mo(i) - level_shift_tcscf
+!  enddo
+!  deallocate( eigval_right_tmp )
+
+  ! L.T x R 
+  call dgemm( "T", "N", mo_num, mo_num, mo_num, 1.d0          &
+            , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) &
+            , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
+            , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
+
+  ! ---
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, mo_num
+    do k = 1, mo_num
+      if(i==k) then
+        accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+        accu_d += dabs(accu_tmp )
+      else
+        accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+        accu_nd += accu_tmp * accu_tmp
+        if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then
+         print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
+        endif
+      endif
+    enddo 
+  enddo
+  accu_nd = dsqrt(accu_nd) / accu_d
+  if(accu_nd .gt. thresh_biorthog_nondiag) then
+    print *, ' bi-orthog failed'
+    print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag
+    print *, ' overlap_fock_tc_eigvec_mo = '
+    do i = 1, mo_num
+      write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
+    enddo
+    stop
+  endif
+
+  ! ---
+
+  if(dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thresh_biorthog_diag) then
+
+    print *, ' mo_num     = ', mo_num 
+    print *, ' accu_d  MO = ', accu_d, thresh_biorthog_diag
+    print *, ' normalizing vectors ...'
+    do i = 1, mo_num
+      norm = dsqrt(dabs(overlap_fock_tc_eigvec_mo(i,i)))
+      if(norm .gt. thresh_biorthog_diag) then
+        do k = 1, mo_num
+          fock_tc_reigvec_mo(k,i) *= 1.d0/norm
+          fock_tc_leigvec_mo(k,i) *= 1.d0/norm
+        enddo
+      endif
+    enddo
+
+    call dgemm( "T", "N", mo_num, mo_num, mo_num, 1.d0          &
+              , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) &
+              , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
+              , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
+
+    accu_d  = 0.d0
+    accu_nd = 0.d0
+    do i = 1, mo_num
+      do k = 1, mo_num
+        if(i==k) then
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_d  += dabs(accu_tmp)
+        else
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_nd += accu_tmp * accu_tmp
+          if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then
+           print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
+          endif
+        endif
+      enddo 
+    enddo
+    accu_nd = dsqrt(accu_nd) / accu_d
+    if(accu_nd .gt. thresh_biorthog_diag) then
+      print *, ' bi-orthog failed'
+      print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag
+      print *, ' overlap_fock_tc_eigvec_mo = '
+      do i = 1, mo_num
+        write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
+      enddo
+      stop
+    endif
+
+  endif
+ 
+  ! ---
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, fock_tc_reigvec_ao, (ao_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, fock_tc_leigvec_ao, (ao_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_fock_tc_eigvec_ao, (mo_num, mo_num) ]
+
+  BEGIN_DOC
+  ! EIGENVECTORS OF FOCK MATRIX ON THE AO BASIS and their OVERLAP
+  !
+  ! THE OVERLAP SHOULD BE THE SAME AS overlap_fock_tc_eigvec_mo
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, q, p
+  double precision              :: accu, accu_d
+  double precision, allocatable :: tmp(:,:)
+
+  PROVIDE mo_l_coef mo_r_coef
+
+!  ! MO_R x R
+   call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
+             , mo_r_coef, size(mo_r_coef, 1)                   &
+             , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
+             , 0.d0, fock_tc_reigvec_ao, size(fock_tc_reigvec_ao, 1) )
+
+   ! MO_L x L
+   call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
+             , mo_l_coef, size(mo_l_coef, 1)                   &
+             , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) &
+             , 0.d0, fock_tc_leigvec_ao, size(fock_tc_leigvec_ao, 1) )
+ 
+  allocate( tmp(mo_num,ao_num) )
+
+  ! tmp <-- L.T x S_ao
+  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                           &
+            , fock_tc_leigvec_ao, size(fock_tc_leigvec_ao, 1), ao_overlap, size(ao_overlap, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! S <-- tmp x R
+  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                             &
+            , tmp, size(tmp, 1), fock_tc_reigvec_ao, size(fock_tc_reigvec_ao, 1) &
+            , 0.d0, overlap_fock_tc_eigvec_ao, size(overlap_fock_tc_eigvec_ao, 1) )
+
+  deallocate( tmp )
+
+  ! ---
+  double precision :: norm
+  do i = 1, mo_num
+   norm = 1.d0/dsqrt(dabs(overlap_fock_tc_eigvec_ao(i,i)))
+   do j = 1, mo_num
+    fock_tc_reigvec_ao(j,i) *= norm
+    fock_tc_leigvec_ao(j,i) *= norm
+   enddo
+  enddo
+
+  allocate( tmp(mo_num,ao_num) )
+
+  ! tmp <-- L.T x S_ao
+  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                           &
+            , fock_tc_leigvec_ao, size(fock_tc_leigvec_ao, 1), ao_overlap, size(ao_overlap, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! S <-- tmp x R
+  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                             &
+            , tmp, size(tmp, 1), fock_tc_reigvec_ao, size(fock_tc_reigvec_ao, 1) &
+            , 0.d0, overlap_fock_tc_eigvec_ao, size(overlap_fock_tc_eigvec_ao, 1) )
+
+  deallocate( tmp )
+
+END_PROVIDER
+
diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f
new file mode 100644
index 00000000..ff1077f5
--- /dev/null
+++ b/src/tc_scf/diis_tcscf.irp.f
@@ -0,0 +1,186 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero_TCSCF ]
+ 
+  implicit none
+ 
+  if(threshold_DIIS_TCSCF == 0.d0) then
+    threshold_DIIS_nonzero_TCSCF = dsqrt(thresh_tcscf)
+  else
+    threshold_DIIS_nonzero_TCSCF = threshold_DIIS_TCSCF
+  endif
+  ASSERT(threshold_DIIS_nonzero_TCSCF >= 0.d0)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, Q_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_alpha = mo_r_coef x eta_occ_alpha x mo_l_coef.T
+  !
+  ! [Q_alpha]_ij = \sum_{k=1}^{elec_alpha_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  Q_alpha = 0.d0
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_alpha, size(Q_alpha, 1) )
+
+END_PROVIDER
+  
+! ---
+    
+BEGIN_PROVIDER [ double precision, Q_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_beta = mo_r_coef x eta_occ_beta x mo_l_coef.T
+  !
+  ! [Q_beta]_ij = \sum_{k=1}^{elec_beta_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  Q_beta = 0.d0
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_beta, size(Q_beta, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Q_matrix, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_matrix = 2 mo_r_coef x eta_occ x mo_l_coef.T
+  ! 
+  ! with: 
+  !                        | 1   if i = j = 1, ..., nb of occ orbitals
+  !        [eta_occ]_ij =  |     
+  !                        | 0   otherwise
+  !
+  ! the diis error is defines as:
+  !                         e = F_ao x Q x ao_overlap - ao_overlap x Q x F_ao
+  ! with: 
+  !       mo_l_coef.T x ao_overlap x mo_r_coef = I
+  !       F_mo = mo_l_coef.T x F_ao x mo_r_coef
+  !       F_ao = (ao_overlap x mo_r_coef) x F_mo x (ao_overlap x mo_l_coef).T
+  !
+  ! ==> e = 2 ao_overlap x mo_r_coef x [ F_mo x eta_occ - eta_occ x F_mo ] x (ao_overlap x mo_l_coef).T
+  !
+  !      at convergence:
+  !                                      F_mo x eta_occ - eta_occ x F_mo = 0
+  !                                  ==> [F_mo]_ij ([eta_occ]_ii - [eta_occ]_jj) = 0  
+  !                                  ==> [F_mo]_ia = [F_mo]_ai = 0 where: i = occ and a = vir
+  !                                  ==> Brillouin conditions
+  !
+  END_DOC
+
+  implicit none
+
+  if(elec_alpha_num == elec_beta_num) then
+    Q_matrix = Q_alpha + Q_alpha
+  else
+    Q_matrix = Q_alpha + Q_beta
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)]
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  allocate(tmp(ao_num,ao_num))
+
+  ! F x Q
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                             &
+            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0             &
+            , tmp, size(tmp, 1), ao_overlap, size(ao_overlap, 1) &
+            , 0.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  ! S x Q
+  tmp = 0.d0
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                       &
+            , ao_overlap, size(ao_overlap, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S - S x Q x F
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, -1.d0                                  &
+            , tmp, size(tmp, 1), Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+            , 1.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  deallocate(tmp)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)]
+
+  implicit none
+
+  call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) &
+                        , FQS_SQF_mo, size(FQS_SQF_mo, 1) )
+
+END_PROVIDER
+
+! ---
+
+! BEGIN_PROVIDER [ double precision, eigenval_Fock_tc_ao, (ao_num) ]
+!&BEGIN_PROVIDER [ double precision, eigenvec_Fock_tc_ao, (ao_num,ao_num) ]
+!
+!  BEGIN_DOC
+!  !
+!  ! Eigenvalues and eigenvectors of the Fock matrix over the ao basis
+!  !
+!  ! F' = X.T x F x X   where X = ao_overlap^(-1/2)
+!  ! 
+!  ! F'   x Cr' = Cr' x E ==> F   Cr = Cr x E with Cr = X x Cr'
+!  ! F'.T x Cl' = Cl' x E ==> F.T Cl = Cl x E with Cl = X x Cl'
+!  !
+!  END_DOC
+!
+!  implicit none
+!  double precision, allocatable :: tmp1(:,:), tmp2(:,:)
+!
+!  ! ---
+!  ! Fock matrix in orthogonal basis: F' = X.T x F x X
+!
+!  allocate(tmp1(ao_num,ao_num))
+!  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                                 &
+!            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), S_half_inv, size(S_half_inv, 1) &
+!            , 0.d0, tmp1, size(tmp1, 1) )
+!
+!  allocate(tmp2(ao_num,ao_num))
+!  call dgemm( 'T', 'N', ao_num, ao_num, ao_num, 1.d0               &
+!            , S_half_inv, size(S_half_inv, 1), tmp1, size(tmp1, 1) &
+!            , 0.d0, tmp2, size(tmp2, 1) )
+!
+!  ! ---
+!
+!  ! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues
+!  ! TODO
+!
+!  ! Back-transform eigenvectors: C =X.C'
+!
+!END_PROVIDER
+
+! ---
+
+~                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
new file mode 100644
index 00000000..fccfd837
--- /dev/null
+++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@@ -0,0 +1,405 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_cs ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_mo_cs = 0.d0
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+   
+      do j = 1, elec_beta_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_cs(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij &
+                                            +        I_bij_ija &
+                                            +        I_bij_jai &
+                                            - 2.d0 * I_bij_aji &
+                                            - 2.d0 * I_bij_iaj &
+                                            - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_a ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj &
+                                           - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           - 2.d0 * I_bij_iaj &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           + I_bij_ija &
+                                           + I_bij_jai &
+                                           - I_bij_aji &
+                                           - I_bij_iaj &
+                                           - I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_b ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           - I_bij_aji )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
+
+  print *, ' PROVIDING fock_3e_uhf_ao_a ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_a = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_a * dm_dk_a * i_mugd_eknu &
+                                      + dm_ge_a * dm_dk_a * i_mugd_knue &
+                                      - dm_ge_a * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_a * i_mugd_kenu &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
+
+  print *, ' PROVIDING fock_3e_uhf_ao_b ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_b * dm_dk_b * i_mugd_eknu &
+                                      + dm_ge_b * dm_dk_b * i_mugd_knue &
+                                      - dm_ge_b * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_b * i_mugd_kenu &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/tc_scf/fock_for_right.irp.f b/src/tc_scf/fock_for_right.irp.f
new file mode 100644
index 00000000..5a51b324
--- /dev/null
+++ b/src/tc_scf/fock_for_right.irp.f
@@ -0,0 +1,107 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, good_hermit_tc_fock_mat, (mo_num, mo_num)]
+
+  BEGIN_DOC
+! good_hermit_tc_fock_mat = Hermitian Upper triangular Fock matrix 
+!
+! The converged eigenvectors of such matrix yield to orthonormal vectors satisfying the left Brillouin theorem
+  END_DOC
+  implicit none
+  integer :: i, j
+
+  good_hermit_tc_fock_mat = Fock_matrix_tc_mo_tot
+  do j = 1, mo_num
+    do i = 1, j-1
+      good_hermit_tc_fock_mat(i,j) = Fock_matrix_tc_mo_tot(j,i) 
+    enddo
+  enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, hermit_average_tc_fock_mat, (mo_num, mo_num)]
+
+  BEGIN_DOC
+! hermit_average_tc_fock_mat = (F + F^\dagger)/2
+  END_DOC
+  implicit none
+  integer :: i, j
+
+  hermit_average_tc_fock_mat = Fock_matrix_tc_mo_tot
+  do j = 1, mo_num
+    do i = 1, mo_num
+      hermit_average_tc_fock_mat(i,j) = 0.5d0 * (Fock_matrix_tc_mo_tot(j,i) + Fock_matrix_tc_mo_tot(i,j))
+    enddo
+  enddo
+
+END_PROVIDER 
+
+
+! ---
+BEGIN_PROVIDER [ double precision, grad_hermit]
+ implicit none
+ BEGIN_DOC
+ ! square of gradient of the energy
+ END_DOC
+ if(symetric_fock_tc)then
+  grad_hermit = grad_hermit_average_tc_fock_mat
+ else
+  grad_hermit = grad_good_hermit_tc_fock_mat
+ endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, grad_good_hermit_tc_fock_mat]
+  implicit none
+  BEGIN_DOC
+  ! grad_good_hermit_tc_fock_mat = norm of gradients of the upper triangular TC fock
+  END_DOC
+  integer :: i, j
+  grad_good_hermit_tc_fock_mat = 0.d0
+  do i = 1, elec_alpha_num
+    do j = elec_alpha_num+1, mo_num
+      grad_good_hermit_tc_fock_mat += dabs(good_hermit_tc_fock_mat(i,j))
+    enddo
+  enddo
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, grad_hermit_average_tc_fock_mat]
+  implicit none
+  BEGIN_DOC
+  ! grad_hermit_average_tc_fock_mat = norm of gradients of the upper triangular TC fock
+  END_DOC
+  integer :: i, j
+  grad_hermit_average_tc_fock_mat = 0.d0
+  do i = 1, elec_alpha_num
+    do j = elec_alpha_num+1, mo_num
+      grad_hermit_average_tc_fock_mat += dabs(hermit_average_tc_fock_mat(i,j))
+    enddo
+  enddo
+END_PROVIDER 
+
+
+! ---
+
+subroutine save_good_hermit_tc_eigvectors()
+
+  implicit none
+  integer        :: sign
+  character*(64) :: label
+  logical        :: output
+
+  sign = 1
+  label = "Canonical"
+  output = .False.
+  
+  if(symetric_fock_tc)then
+   call mo_as_eigvectors_of_mo_matrix(hermit_average_tc_fock_mat, mo_num, mo_num, label, sign, output)
+  else
+   call mo_as_eigvectors_of_mo_matrix(good_hermit_tc_fock_mat, mo_num, mo_num, label, sign, output)
+  endif
+end subroutine save_good_hermit_tc_eigvectors
+
+! ---
+
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
new file mode 100644
index 00000000..6796666d
--- /dev/null
+++ b/src/tc_scf/fock_tc.irp.f
@@ -0,0 +1,307 @@
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_beta , (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! two_e_tc_non_hermit_integral_seq_alpha(k,i) = <k| F^tc_alpha |i> 
+  !
+  ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: density, density_a, density_b
+  double precision :: t0, t1
+
+  !print*, ' providing two_e_tc_non_hermit_integral_seq ...'
+  !call wall_time(t0)
+
+  two_e_tc_non_hermit_integral_seq_alpha = 0.d0
+  two_e_tc_non_hermit_integral_seq_beta  = 0.d0
+
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do j = 1, ao_num
+        do l = 1, ao_num
+
+          density_a = TCSCF_density_matrix_ao_alpha(l,j)
+          density_b = TCSCF_density_matrix_ao_beta (l,j)
+          density   = density_a + density_b
+
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho_a(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !!                                         rho_b(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
+
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho_a(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !!                                         rho_b(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
+
+          !                                         rho(l,j)   *      < k l| T | i j>
+          two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
+          !                                         rho(l,j)   *      < k l| T | i j>
+          two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
+          !                                         rho_a(l,j) *      < k l| T | j i>
+          two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !                                         rho_b(l,j) *      < k l| T | j i>
+          two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  !call wall_time(t1)
+  !print*, ' wall time for two_e_tc_non_hermit_integral_seq after = ', t1 - t0
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! two_e_tc_non_hermit_integral_alpha(k,i) = <k| F^tc_alpha |i> 
+  !
+  ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l
+  double precision              :: density, density_a, density_b, I_coul, I_kjli
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp_a(:,:), tmp_b(:,:)
+
+  !print*, ' providing two_e_tc_non_hermit_integral ...'
+  !call wall_time(t0)
+
+  two_e_tc_non_hermit_integral_alpha = 0.d0
+  two_e_tc_non_hermit_integral_beta  = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                        &
+ !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli)              &
+ !$OMP SHARED  (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, &
+ !$OMP         two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta)
+
+  allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num))
+  tmp_a = 0.d0
+  tmp_b = 0.d0
+
+ !$OMP DO
+  do j = 1, ao_num
+    do l = 1, ao_num
+      density_a = TCSCF_density_matrix_ao_alpha(l,j)
+      density_b = TCSCF_density_matrix_ao_beta (l,j)
+      density   = density_a + density_b                      
+      do i = 1, ao_num
+        do k = 1, ao_num
+
+          I_coul = density * ao_two_e_tc_tot(k,i,l,j)
+          I_kjli = ao_two_e_tc_tot(k,j,l,i)
+
+          tmp_a(k,i) += I_coul - density_a * I_kjli
+          tmp_b(k,i) += I_coul - density_b * I_kjli
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do i = 1, ao_num
+    do j = 1, ao_num
+      two_e_tc_non_hermit_integral_alpha(j,i) += tmp_a(j,i)
+      two_e_tc_non_hermit_integral_beta (j,i) += tmp_b(j,i)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(tmp_a, tmp_b)
+ !$OMP END PARALLEL
+
+  !call wall_time(t1)
+  !print*, ' wall time for two_e_tc_non_hermit_integral after = ', t1 - t0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_alpha, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the AO basis
+  END_DOC
+
+  implicit none
+
+  Fock_matrix_tc_ao_alpha =  ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha 
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_beta, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
+  END_DOC
+
+  implicit none
+
+  Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_beta 
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
+
+  BEGIN_DOC
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  END_DOC
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  if(bi_ortho) then
+
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_alpha
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_a
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1))
+    !deallocate(tmp)
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+                          , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+    if(three_body_h_tc) then
+      !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
+      Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a
+    endif
+
+  else
+    call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+                 , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
+
+  BEGIN_DOC
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  END_DOC
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  if(bi_ortho) then
+
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_beta
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_b
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1))
+    !deallocate(tmp)
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+                          , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+    if(three_body_h_tc) then
+      !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
+      Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b
+    endif
+
+  else
+
+    call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+                 , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+
+  endif
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, grad_non_hermit_left]
+&BEGIN_PROVIDER [ double precision, grad_non_hermit_right]
+&BEGIN_PROVIDER [ double precision, grad_non_hermit]
+
+  implicit none
+  integer :: i, k
+
+  grad_non_hermit_left  = 0.d0
+  grad_non_hermit_right = 0.d0
+
+  do i = 1, elec_beta_num ! doc --> SOMO
+    do k = elec_beta_num+1, elec_alpha_num
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      !grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      !grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
+    enddo
+  enddo
+
+  do i = 1, elec_beta_num ! doc --> virt 
+    do k = elec_alpha_num+1, mo_num
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
+    enddo
+  enddo
+
+  do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt 
+    do k = elec_alpha_num+1, mo_num
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
+    enddo
+  enddo
+
+  !grad_non_hermit = dsqrt(grad_non_hermit_left) + dsqrt(grad_non_hermit_right)
+  grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
+
+  implicit none
+
+  call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) &
+                        , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) )
+
+END_PROVIDER
+
+! ---
+
+
diff --git a/src/tc_scf/fock_tc_mo_tot.irp.f b/src/tc_scf/fock_tc_mo_tot.irp.f
new file mode 100644
index 00000000..2f33cd17
--- /dev/null
+++ b/src/tc_scf/fock_tc_mo_tot.irp.f
@@ -0,0 +1,144 @@
+
+ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ double precision, Fock_matrix_tc_diag_mo_tot, (mo_num)]
+   implicit none
+   BEGIN_DOC
+   ! Fock matrix on the MO basis.
+   ! For open shells, the ROHF Fock Matrix is ::
+   !
+   !       |   F-K    |  F + K/2  |    F     |
+   !       |---------------------------------|
+   !       | F + K/2  |     F     |  F - K/2 |
+   !       |---------------------------------|
+   !       |    F     |  F - K/2  |  F + K   |
+   !
+   !
+   ! F = 1/2 (Fa + Fb)
+   !
+   ! K = Fb - Fa
+   !
+   END_DOC
+   integer                        :: i,j,n
+   if (elec_alpha_num == elec_beta_num) then
+     Fock_matrix_tc_mo_tot = Fock_matrix_tc_mo_alpha
+   else
+
+     do j=1,elec_beta_num
+       ! F-K
+       do i=1,elec_beta_num !CC
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+             - (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+       ! F+K/2
+       do i=elec_beta_num+1,elec_alpha_num  !CA
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+             + 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+       ! F
+       do i=elec_alpha_num+1, mo_num !CV
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
+       enddo
+     enddo
+
+     do j=elec_beta_num+1,elec_alpha_num
+       ! F+K/2
+       do i=1,elec_beta_num !AC
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+             + 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+       ! F
+       do i=elec_beta_num+1,elec_alpha_num !AA
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
+       enddo
+       ! F-K/2
+       do i=elec_alpha_num+1, mo_num !AV
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+             - 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+     enddo
+
+     do j=elec_alpha_num+1, mo_num
+       ! F
+       do i=1,elec_beta_num !VC
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))
+       enddo
+       ! F-K/2
+       do i=elec_beta_num+1,elec_alpha_num !VA
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j))&
+             - 0.5d0*(Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+       ! F+K
+       do i=elec_alpha_num+1,mo_num !VV
+         Fock_matrix_tc_mo_tot(i,j) = 0.5d0*(Fock_matrix_tc_mo_alpha(i,j)+Fock_matrix_tc_mo_beta(i,j)) &
+             + (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
+       enddo
+     enddo
+     if(three_body_h_tc)then
+      ! C-O
+      do j = 1, elec_beta_num
+       do i = elec_beta_num+1, elec_alpha_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! C-V
+      do j = 1, elec_beta_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! O-V
+      do j = elec_beta_num+1, elec_alpha_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+     endif
+
+   endif
+
+   do i = 1, mo_num
+     Fock_matrix_tc_diag_mo_tot(i) = Fock_matrix_tc_mo_tot(i,i)
+   enddo
+
+
+   if(frozen_orb_scf)then
+     integer                        :: iorb,jorb
+     do i = 1, n_core_orb
+      iorb = list_core(i)
+      do j = 1, n_act_orb
+       jorb = list_act(j)
+       Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+       Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0
+      enddo
+     enddo
+   endif
+
+   if(no_oa_or_av_opt)then
+     do i = 1, n_act_orb
+       iorb = list_act(i)
+       do j = 1, n_inact_orb
+         jorb = list_inact(j)
+         Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+         Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0
+       enddo
+       do j = 1, n_virt_orb
+         jorb = list_virt(j)
+         Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+         Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0
+       enddo
+       do j = 1, n_core_orb
+         jorb = list_core(j)
+         Fock_matrix_tc_mo_tot(iorb,jorb) = 0.d0
+         Fock_matrix_tc_mo_tot(jorb,iorb) = 0.d0                                                                                                                 
+       enddo
+     enddo
+   endif
+  if(.not.bi_ortho .and. three_body_h_tc)then
+   Fock_matrix_tc_mo_tot += fock_3_mat
+  endif
+
+END_PROVIDER
+
diff --git a/src/tc_scf/fock_three.irp.f b/src/tc_scf/fock_three.irp.f
new file mode 100644
index 00000000..424eeffd
--- /dev/null
+++ b/src/tc_scf/fock_three.irp.f
@@ -0,0 +1,229 @@
+BEGIN_PROVIDER [ double precision, fock_3_mat, (mo_num, mo_num)] 
+ implicit none
+  integer :: i,j
+  double precision :: contrib
+  fock_3_mat = 0.d0
+  if(.not.bi_ortho.and.three_body_h_tc)then
+   call give_fock_ia_three_e_total(1,1,contrib)
+!!  !$OMP PARALLEL                  &
+!!  !$OMP DEFAULT (NONE)            &
+!!  !$OMP PRIVATE (i,j,m,integral) & 
+!!  !$OMP SHARED (mo_num,three_body_3_index)
+!!  !$OMP DO SCHEDULE (guided) COLLAPSE(3)
+   do i = 1, mo_num
+    do j = 1, mo_num
+     call give_fock_ia_three_e_total(j,i,contrib)
+     fock_3_mat(j,i) = -contrib
+    enddo
+   enddo
+  else if(bi_ortho.and.three_body_h_tc)then
+!!  !$OMP END DO
+!!  !$OMP END PARALLEL
+!!  do i = 1, mo_num
+!!   do j = 1, i-1
+!!    mat_three(j,i) = mat_three(i,j)
+!!   enddo
+!!  enddo
+ endif
+
+END_PROVIDER 
+
+
+subroutine give_fock_ia_three_e_total(i,a,contrib)
+ implicit none
+ BEGIN_DOC
+! contrib is the TOTAL (same spins / opposite spins) contribution from the three body term to the Fock operator 
+!
+ END_DOC
+ integer, intent(in) :: i,a
+ double precision, intent(out) :: contrib
+ double precision :: int_1, int_2, int_3
+ double precision :: mos_i, mos_a, w_ia
+ double precision :: mos_ia, weight
+
+ integer :: mm, ipoint,k,l
+
+ int_1 = 0.d0
+ int_2 = 0.d0
+ int_3 = 0.d0
+ do mm = 1, 3
+  do ipoint = 1, n_points_final_grid
+   weight = final_weight_at_r_vector(ipoint)                                                                          
+   mos_i  = mos_in_r_array_transp(ipoint,i) 
+   mos_a  = mos_in_r_array_transp(ipoint,a) 
+   mos_ia = mos_a * mos_i
+   w_ia   = x_W_ij_erf_rk(ipoint,mm,i,a) 
+     
+   int_1  += weight * fock_3_w_kk_sum(ipoint,mm) * (4.d0 * fock_3_rho_beta(ipoint) * w_ia               & 
+                                                  + 2.0d0 * mos_ia * fock_3_w_kk_sum(ipoint,mm)         & 
+                                                  - 2.0d0 * fock_3_w_ki_mos_k(ipoint,mm,i) * mos_a      & 
+                                                  - 2.0d0 * fock_3_w_ki_mos_k(ipoint,mm,a) * mos_i      )
+   int_2  += weight * (-1.d0) * ( 2.0d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * w_ia                     & 
+                                + 2.0d0 * fock_3_rho_beta(ipoint) * fock_3_w_ki_wk_a(ipoint,mm,i,a)   & 
+                                + 1.0d0 * mos_ia * fock_3_trace_w_tilde(ipoint,mm)                    )
+
+   int_3  += weight *   1.d0  * (fock_3_w_kl_wla_phi_k(ipoint,mm,i) * mos_a + fock_3_w_kl_wla_phi_k(ipoint,mm,a) * mos_i & 
+                                +fock_3_w_ki_mos_k(ipoint,mm,i)     * fock_3_w_ki_mos_k(ipoint,mm,a)                     )
+  enddo
+ enddo
+ contrib = int_1 + int_2 + int_3
+
+end
+
+! ---
+
+BEGIN_PROVIDER [double precision, diag_three_elem_hf]
+
+  implicit none
+  integer          :: i, j, k, ipoint, mm
+  double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231
+  double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' providing diag_three_elem_hf'
+
+  if(.not. three_body_h_tc) then
+
+    diag_three_elem_hf = 0.d0
+
+  else
+
+    if(.not. bi_ortho) then
+
+      ! ---
+
+      one_third  = 1.d0/3.d0
+      two_third  = 2.d0/3.d0
+      four_third = 4.d0/3.d0
+      diag_three_elem_hf = 0.d0
+      do i = 1, elec_beta_num
+        do j = 1, elec_beta_num
+          do k = 1, elec_beta_num
+            call give_integrals_3_body(k, j, i, j, i, k,exchange_int_231)   
+            diag_three_elem_hf += two_third * exchange_int_231
+          enddo
+        enddo
+      enddo
+      do mm = 1, 3
+        do ipoint = 1, n_points_final_grid
+          weight  = final_weight_at_r_vector(ipoint)                                                                          
+          contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & 
+                  - 2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm)                     & 
+                  - 1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm)
+          contrib *= four_third
+          contrib += -two_third  * fock_3_rho_beta(ipoint)    * fock_3_w_kl_w_kl(ipoint,mm) & 
+                     -four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm)
+          diag_three_elem_hf += weight * contrib
+       enddo
+      enddo
+
+      diag_three_elem_hf = - diag_three_elem_hf
+
+      ! ---
+
+    else
+
+      provide mo_l_coef mo_r_coef
+      call give_aaa_contrib(integral_aaa)
+      call give_aab_contrib(integral_aab)
+      call give_abb_contrib(integral_abb)
+      call give_bbb_contrib(integral_bbb)
+      diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+!      print*,'integral_aaa + integral_aab + integral_abb + integral_bbb'
+!      print*,integral_aaa , integral_aab , integral_abb , integral_bbb
+
+    endif
+
+  endif
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh, (mo_num, mo_num)]
+ implicit none 
+ integer :: h,p,i,j
+ double precision :: direct_int, exch_int, exchange_int_231, exchange_int_312
+ double precision :: exchange_int_23, exchange_int_12, exchange_int_13 
+
+ fock_3_mat_a_op_sh = 0.d0
+ do h = 1, mo_num
+  do p = 1, mo_num
+   !F_a^{ab}(h,p) 
+   do i = 1, elec_beta_num ! beta 
+    do j = elec_beta_num+1, elec_alpha_num ! alpha
+     call  give_integrals_3_body(h,j,i,p,j,i,direct_int)    ! <hji|pji>
+     call  give_integrals_3_body(h,j,i,j,p,i,exch_int)   
+     fock_3_mat_a_op_sh(h,p) -= direct_int - exch_int
+    enddo
+   enddo
+   !F_a^{aa}(h,p)
+   do i = 1, elec_beta_num ! alpha 
+    do j = elec_beta_num+1, elec_alpha_num ! alpha
+       direct_int = three_body_4_index(j,i,h,p)                    
+       call  give_integrals_3_body(h,j,i,p,j,i,direct_int) 
+       call  give_integrals_3_body(h,j,i,i,p,j,exchange_int_231)
+       call  give_integrals_3_body(h,j,i,j,i,p,exchange_int_312) 
+       call  give_integrals_3_body(h,j,i,p,i,j,exchange_int_23) 
+       call  give_integrals_3_body(h,j,i,i,j,p,exchange_int_12)
+       call  give_integrals_3_body(h,j,i,j,p,i,exchange_int_13)  
+       fock_3_mat_a_op_sh(h,p) -= ( direct_int + exchange_int_231 + exchange_int_312 & 
+              -  exchange_int_23 & ! i <-> j
+              -  exchange_int_12 & ! p <-> j
+              -  exchange_int_13  )! p <-> i
+    enddo 
+   enddo
+  enddo
+ enddo
+! symmetrized 
+! do p = 1, elec_beta_num
+!  do h = elec_alpha_num +1, mo_num
+!   fock_3_mat_a_op_sh(h,p) = fock_3_mat_a_op_sh(p,h)
+!  enddo
+! enddo
+ 
+! do h = elec_beta_num+1, elec_alpha_num
+!  do p = elec_alpha_num +1, mo_num
+!   !F_a^{bb}(h,p) 
+!   do i = 1, elec_beta_num
+!    do j = i+1, elec_beta_num
+!     call  give_integrals_3_body(h,j,i,p,j,i,direct_int)   
+!     call  give_integrals_3_body(h,j,i,p,i,j,exch_int)   
+!     fock_3_mat_a_op_sh(h,p) -= direct_int - exch_int
+!    enddo
+!   enddo
+!  enddo
+! enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_mat_b_op_sh, (mo_num, mo_num)]
+ implicit none 
+ integer :: h,p,i,j
+ double precision :: direct_int, exch_int
+ fock_3_mat_b_op_sh = 0.d0
+ do h = 1, elec_beta_num
+  do p = elec_alpha_num +1, mo_num
+   !F_b^{aa}(h,p) 
+   do i = 1, elec_beta_num
+    do j = elec_beta_num+1, elec_alpha_num
+     call  give_integrals_3_body(h,j,i,p,j,i,direct_int)   
+     call  give_integrals_3_body(h,j,i,p,i,j,exch_int)   
+     fock_3_mat_b_op_sh(h,p) += direct_int - exch_int
+    enddo
+   enddo
+
+   !F_b^{ab}(h,p) 
+   do i = elec_beta_num+1, elec_beta_num
+    do j = 1, elec_beta_num
+     call  give_integrals_3_body(h,j,i,p,j,i,direct_int)   
+     call  give_integrals_3_body(h,j,i,j,p,i,exch_int)   
+     fock_3_mat_b_op_sh(h,p) += direct_int - exch_int
+    enddo
+   enddo
+ 
+  enddo
+ enddo
+
+END_PROVIDER 
diff --git a/src/tc_scf/fock_three_bi_ortho.irp.f b/src/tc_scf/fock_three_bi_ortho.irp.f
new file mode 100644
index 00000000..279670b8
--- /dev/null
+++ b/src/tc_scf/fock_three_bi_ortho.irp.f
@@ -0,0 +1,178 @@
+BEGIN_PROVIDER [ double precision, fock_a_abb_3e_bi_orth_old, (mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! fock_a_abb_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,beta,beta contribution
+ END_DOC
+ fock_a_abb_3e_bi_orth_old = 0.d0
+ integer :: i,a,j,k
+ double precision :: direct_int, exch_23_int
+ do i = 1, mo_num
+  do a = 1, mo_num
+   
+   do j = 1, elec_beta_num
+    do k = j+1, elec_beta_num
+      ! see contrib_3e_soo
+      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
+      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
+      fock_a_abb_3e_bi_orth_old(a,i) += direct_int - exch_23_int 
+    enddo
+   enddo
+
+  enddo
+ enddo
+ fock_a_abb_3e_bi_orth_old = - fock_a_abb_3e_bi_orth_old
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_a_aba_3e_bi_orth_old, (mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! fock_a_aba_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,alpha,beta contribution
+ END_DOC
+ fock_a_aba_3e_bi_orth_old = 0.d0
+ integer :: i,a,j,k
+ double precision :: direct_int, exch_13_int
+ do i = 1, mo_num
+  do a = 1, mo_num
+   
+   do j = 1, elec_alpha_num ! a
+    do k = 1, elec_beta_num ! b
+                                                                       !   a b a   a b a
+      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
+      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
+      fock_a_aba_3e_bi_orth_old(a,i) += direct_int - exch_13_int 
+    enddo
+   enddo
+
+  enddo
+ enddo
+ fock_a_aba_3e_bi_orth_old = - fock_a_aba_3e_bi_orth_old
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_a_aaa_3e_bi_orth_old, (mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! fock_a_aaa_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,alpha,alpha contribution
+ END_DOC
+ fock_a_aaa_3e_bi_orth_old = 0.d0
+ integer :: i,a,j,k
+ double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+ do i = 1, mo_num
+  do a = 1, mo_num
+   
+   do j = 1, elec_alpha_num
+    do k = j+1, elec_alpha_num
+      ! positive terms :: cycle contrib 
+      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+      call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+      call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+      fock_a_aaa_3e_bi_orth_old(a,i) += direct_int + c_3_int + c_minus_3_int 
+      ! negative terms :: exchange contrib
+      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+      call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+      fock_a_aaa_3e_bi_orth_old(a,i) += - exch_13_int - exch_23_int  - exch_12_int 
+    enddo
+   enddo
+
+  enddo
+ enddo
+ fock_a_aaa_3e_bi_orth_old = - fock_a_aaa_3e_bi_orth_old
+END_PROVIDER 
+
+BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth_old, (mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! fock_a_tot_3e_bi_orth_old = bi-ortho 3-e Fock matrix for alpha electrons from all possible spin contributions 
+ END_DOC
+ fock_a_tot_3e_bi_orth_old = fock_a_abb_3e_bi_orth_old + fock_a_aba_3e_bi_orth_old + fock_a_aaa_3e_bi_orth_old 
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_b_baa_3e_bi_orth_old, (mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! fock_b_baa_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for beta electrons from beta,alpha,alpha contribution
+ END_DOC
+ fock_b_baa_3e_bi_orth_old = 0.d0
+ integer :: i,a,j,k
+ double precision :: direct_int, exch_23_int
+ do i = 1, mo_num
+  do a = 1, mo_num
+   
+   do j = 1, elec_alpha_num
+    do k = j+1, elec_alpha_num
+      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
+      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
+      fock_b_baa_3e_bi_orth_old(a,i) += direct_int - exch_23_int 
+    enddo
+   enddo
+
+  enddo
+ enddo
+ fock_b_baa_3e_bi_orth_old = - fock_b_baa_3e_bi_orth_old
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_b_bab_3e_bi_orth_old, (mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! fock_b_bab_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for beta electrons from beta,alpha,beta contribution
+ END_DOC
+ fock_b_bab_3e_bi_orth_old = 0.d0
+ integer :: i,a,j,k
+ double precision :: direct_int, exch_13_int
+ do i = 1, mo_num
+  do a = 1, mo_num
+   
+   do j = 1, elec_beta_num
+    do k = 1, elec_alpha_num
+      !                                                                    b a b   b a b
+      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
+      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13
+      fock_b_bab_3e_bi_orth_old(a,i) += direct_int - exch_13_int 
+    enddo
+   enddo
+
+  enddo
+ enddo
+ fock_b_bab_3e_bi_orth_old = - fock_b_bab_3e_bi_orth_old
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_b_bbb_3e_bi_orth_old, (mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! fock_b_bbb_3e_bi_orth_old(a,i) = bi-ortho 3-e Fock matrix for alpha electrons from alpha,alpha,alpha contribution
+ END_DOC
+ fock_b_bbb_3e_bi_orth_old = 0.d0
+ integer :: i,a,j,k
+ double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+ do i = 1, mo_num
+  do a = 1, mo_num
+   
+   do j = 1, elec_beta_num
+    do k = j+1, elec_beta_num
+      ! positive terms :: cycle contrib 
+      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+      call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+      call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+      fock_b_bbb_3e_bi_orth_old(a,i) += direct_int + c_3_int + c_minus_3_int 
+      ! negative terms :: exchange contrib
+      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+      call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+      fock_b_bbb_3e_bi_orth_old(a,i) += - exch_13_int - exch_23_int  - exch_12_int 
+    enddo
+   enddo
+
+  enddo
+ enddo
+ fock_b_bbb_3e_bi_orth_old = - fock_b_bbb_3e_bi_orth_old
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_b_tot_3e_bi_orth_old, (mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! fock_b_tot_3e_bi_orth_old = bi-ortho 3-e Fock matrix for alpha electrons from all possible spin contributions 
+ END_DOC
+ fock_b_tot_3e_bi_orth_old = fock_b_bbb_3e_bi_orth_old + fock_b_bab_3e_bi_orth_old + fock_b_baa_3e_bi_orth_old
+
+END_PROVIDER 
diff --git a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
new file mode 100644
index 00000000..f73171a3
--- /dev/null
+++ b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
@@ -0,0 +1,286 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)]
+
+  implicit none
+  integer :: i, a
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_a_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i)
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)]
+
+  implicit none
+  integer :: i, a
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i)
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_cs_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = 1, elec_beta_num
+        do k = 1, elec_beta_num
+
+          !!call contrib_3e_sss(a,i,j,k,contrib_sss)
+          !!call contrib_3e_soo(a,i,j,k,contrib_soo)
+          !!call contrib_3e_sos(a,i,j,k,contrib_sos)
+          !!contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos
+ 
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+
+          ! negative terms :: exchange contrib
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+
+          new = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) -1.5d0 * exch_13_int - exch_23_int
+
+          fock_cs_3e_bi_orth(a,i) += new
+        enddo
+      enddo
+    enddo
+  enddo
+ 
+  fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_a_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_beta_num
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+          
+          fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss
+
+  PROVIDE mo_l_coef mo_r_coef
+ 
+  fock_a_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_alpha_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call contrib_3e_sss(a, i, j, k, contrib_sss)
+
+          fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_beta_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+
+          fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
+        enddo
+      enddo
+    enddo
+  enddo
+
+  fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_soo
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_alpha_num
+          call contrib_3e_soo(a, i, j, k, contrib_soo)
+
+          fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+subroutine contrib_3e_sss(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+  call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+  integral = direct_int + c_3_int + c_minus_3_int 
+
+  ! negative terms :: exchange contrib
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+  call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+  integral += - exch_13_int - exch_23_int  - exch_12_int 
+
+  integral = -integral
+
+end
+
+! ---
+
+subroutine contrib_3e_soo(a,i,j,k,integral)
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_23_int
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
+  integral = direct_int - exch_23_int 
+
+  integral = -integral
+
+end
+
+! ---
+
+subroutine contrib_3e_sos(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
+  integral = direct_int - exch_13_int 
+
+  integral = -integral
+
+end
+
+! ---
+
diff --git a/src/tc_scf/fock_three_utils.irp.f b/src/tc_scf/fock_three_utils.irp.f
new file mode 100644
index 00000000..5aec1d9e
--- /dev/null
+++ b/src/tc_scf/fock_three_utils.irp.f
@@ -0,0 +1,140 @@
+
+BEGIN_PROVIDER [ double precision, fock_3_w_kk_sum, (n_points_final_grid,3)]
+ implicit none
+ integer :: mm, ipoint,k
+ double precision :: w_kk
+ fock_3_w_kk_sum = 0.d0
+ do k = 1, elec_beta_num
+  do mm = 1, 3
+   do ipoint = 1, n_points_final_grid
+    w_kk   = x_W_ij_erf_rk(ipoint,mm,k,k) 
+    fock_3_w_kk_sum(ipoint,mm) += w_kk
+   enddo
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_w_ki_mos_k, (n_points_final_grid,3,mo_num)]
+ implicit none
+ integer :: mm, ipoint,k,i
+ double precision :: w_ki, mo_k
+ fock_3_w_ki_mos_k = 0.d0
+ do i = 1, mo_num
+  do k = 1, elec_beta_num
+   do mm = 1, 3
+    do ipoint = 1, n_points_final_grid
+     w_ki   = x_W_ij_erf_rk(ipoint,mm,k,i) 
+     mo_k = mos_in_r_array(k,ipoint)
+     fock_3_w_ki_mos_k(ipoint,mm,i) += w_ki * mo_k
+    enddo
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_w_kl_w_kl, (n_points_final_grid,3)]
+ implicit none
+ integer :: k,j,ipoint,mm
+ double precision :: w_kj
+ fock_3_w_kl_w_kl = 0.d0
+ do j = 1, elec_beta_num
+  do k = 1, elec_beta_num
+   do mm = 1, 3
+    do ipoint = 1, n_points_final_grid
+     w_kj   = x_W_ij_erf_rk(ipoint,mm,k,j) 
+     fock_3_w_kl_w_kl(ipoint,mm) += w_kj * w_kj
+    enddo
+   enddo
+  enddo
+ enddo
+
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_rho_beta, (n_points_final_grid)]
+ implicit none
+ integer :: ipoint,k
+ fock_3_rho_beta = 0.d0
+ do ipoint = 1, n_points_final_grid
+  do k = 1, elec_beta_num
+   fock_3_rho_beta(ipoint) += mos_in_r_array(k,ipoint) * mos_in_r_array(k,ipoint)
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_w_kl_mo_k_mo_l, (n_points_final_grid,3)]
+ implicit none
+ integer :: ipoint,k,l,mm
+ double precision :: mos_k, mos_l, w_kl
+ fock_3_w_kl_mo_k_mo_l = 0.d0
+ do k = 1, elec_beta_num
+  do l = 1, elec_beta_num
+   do mm = 1, 3
+    do ipoint = 1, n_points_final_grid
+     mos_k  = mos_in_r_array_transp(ipoint,k) 
+     mos_l  = mos_in_r_array_transp(ipoint,l) 
+     w_kl   = x_W_ij_erf_rk(ipoint,mm,l,k)
+     fock_3_w_kl_mo_k_mo_l(ipoint,mm) += w_kl * mos_k * mos_l 
+    enddo
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_w_ki_wk_a, (n_points_final_grid,3,mo_num, mo_num)]
+ implicit none
+ integer :: ipoint,i,a,k,mm
+ double precision :: w_ki,w_ka
+ fock_3_w_ki_wk_a = 0.d0
+ do i = 1, mo_num
+  do a = 1, mo_num
+   do mm = 1, 3
+    do ipoint = 1, n_points_final_grid
+     do k = 1, elec_beta_num
+      w_ki   = x_W_ij_erf_rk(ipoint,mm,k,i)
+      w_ka   = x_W_ij_erf_rk(ipoint,mm,k,a)
+      fock_3_w_ki_wk_a(ipoint,mm,a,i) += w_ki * w_ka
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_trace_w_tilde, (n_points_final_grid,3)]
+ implicit none
+ integer :: ipoint,k,mm
+ fock_3_trace_w_tilde = 0.d0
+ do k = 1, elec_beta_num
+   do mm = 1, 3
+    do ipoint = 1, n_points_final_grid
+     fock_3_trace_w_tilde(ipoint,mm) += fock_3_w_ki_wk_a(ipoint,mm,k,k)
+    enddo
+   enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, fock_3_w_kl_wla_phi_k, (n_points_final_grid,3,mo_num)]
+ implicit none
+ integer :: ipoint,a,k,mm,l
+ double precision :: w_kl,w_la, mo_k
+ fock_3_w_kl_wla_phi_k = 0.d0
+ do a = 1, mo_num
+  do k = 1, elec_beta_num 
+   do l = 1, elec_beta_num
+    do mm = 1, 3
+     do ipoint = 1, n_points_final_grid
+      w_kl   = x_W_ij_erf_rk(ipoint,mm,l,k)
+      w_la   = x_W_ij_erf_rk(ipoint,mm,l,a)
+      mo_k  = mos_in_r_array_transp(ipoint,k) 
+      fock_3_w_kl_wla_phi_k(ipoint,mm,a) += w_kl * w_la * mo_k
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+END_PROVIDER 
+
diff --git a/src/tc_scf/integrals_in_r_stuff.irp.f b/src/tc_scf/integrals_in_r_stuff.irp.f
new file mode 100644
index 00000000..3ce85a97
--- /dev/null
+++ b/src/tc_scf/integrals_in_r_stuff.irp.f
@@ -0,0 +1,391 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, tc_scf_dm_in_r, (n_points_final_grid) ]
+
+  implicit none
+  integer :: i, j
+
+  tc_scf_dm_in_r = 0.d0
+  do i = 1, n_points_final_grid
+    do j = 1, elec_beta_num
+      tc_scf_dm_in_r(i) += mos_r_in_r_array(j,i) * mos_l_in_r_array(j,i)
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, w_sum_in_r, (n_points_final_grid, 3)]
+
+  implicit none
+  integer :: ipoint, j, xi
+
+  w_sum_in_r = 0.d0
+  do j = 1, elec_beta_num
+    do xi = 1, 3
+      do ipoint = 1, n_points_final_grid
+        !w_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,j)
+        w_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j)
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ww_sum_in_r, (n_points_final_grid, 3)]
+
+  implicit none
+  integer          :: ipoint, j, xi
+  double precision :: tmp
+
+  ww_sum_in_r = 0.d0
+  do j = 1, elec_beta_num
+    do xi = 1, 3
+      do ipoint = 1, n_points_final_grid
+        tmp = x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j)
+        ww_sum_in_r(ipoint,xi) += tmp * tmp
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_r_in_r, (n_points_final_grid, 3, mo_num)]
+
+  implicit none
+  integer :: i, j, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_r_in_r = 0.d0
+  do i = 1, mo_num
+    do j = 1, elec_beta_num
+      do xi = 1, 3
+        do ipoint = 1, n_points_final_grid
+          W1_r_in_r(ipoint,xi,i) += mos_r_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_l_in_r, (n_points_final_grid, 3, mo_num)]
+
+  implicit none
+  integer :: i, j, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_l_in_r = 0.d0
+  do i = 1, mo_num
+    do j = 1, elec_beta_num
+      do xi = 1, 3
+        do ipoint = 1, n_points_final_grid
+          W1_l_in_r(ipoint,xi,i) += mos_l_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_in_r, (n_points_final_grid, 3)]
+
+  implicit none
+  integer :: j, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_in_r = 0.d0
+  do j = 1, elec_beta_num
+    do xi = 1, 3
+      do ipoint = 1, n_points_final_grid
+        W1_in_r(ipoint,xi) += W1_l_in_r(ipoint,xi,j) * mos_r_in_r_array_transp(ipoint,j)
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_diag_in_r, (n_points_final_grid, 3)]
+
+  implicit none
+  integer :: j, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_diag_in_r = 0.d0
+  do j = 1, elec_beta_num
+    do xi = 1, 3
+      do ipoint = 1, n_points_final_grid
+        W1_diag_in_r(ipoint,xi) += mos_r_in_r_array_transp(ipoint,j) * mos_l_in_r_array_transp(ipoint,j) * x_W_ki_bi_ortho_erf_rk_diag(ipoint,xi,j)
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_sum_in_r, (n_points_final_grid, 3)]
+
+  implicit none
+  integer :: i, j, xi, ipoint
+
+  ! TODO: call lapack
+  v_sum_in_r = 0.d0
+  do i = 1, elec_beta_num
+    do j = 1, elec_beta_num
+      do xi = 1, 3
+        do ipoint = 1, n_points_final_grid
+          v_sum_in_r(ipoint,xi) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_W1_r_in_r, (n_points_final_grid, 3, mo_num)]
+
+  implicit none
+  integer :: i, m, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_W1_r_in_r = 0.d0
+  do i = 1, mo_num
+    do m = 1, elec_beta_num
+      do xi = 1, 3
+        do ipoint = 1, n_points_final_grid
+          W1_W1_r_in_r(ipoint,xi,i) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,m,i) * W1_r_in_r(ipoint,xi,m)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, W1_W1_l_in_r, (n_points_final_grid, 3, mo_num)]
+
+  implicit none
+  integer :: i, j, xi, ipoint
+
+  ! TODO: call lapack
+
+  W1_W1_l_in_r = 0.d0
+  do i = 1, mo_num
+    do j = 1, elec_beta_num
+      do xi = 1, 3
+        do ipoint = 1, n_points_final_grid
+          W1_W1_l_in_r(ipoint,xi,i) += x_W_ki_bi_ortho_erf_rk(ipoint,xi,i,j) * W1_l_in_r(ipoint,xi,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+subroutine direct_term_imj_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j,m = 1, elec_beta_num) < a m j | i m j > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi
+  double precision              :: weight, tmp
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+      !integral += ( mos_l_in_r_array(a,ipoint) * mos_r_in_r_array(i,ipoint) * w_sum_in_r(ipoint,xi) * w_sum_in_r(ipoint,xi) & 
+      !            + 2.d0 * tc_scf_dm_in_r(ipoint) * w_sum_in_r(ipoint,xi) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) ) * weight
+
+      tmp = w_sum_in_r(ipoint,xi)
+
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * tmp * tmp & 
+                  + 2.d0 * tc_scf_dm_in_r(ipoint) * tmp * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i)       &
+                  ) * weight
+    enddo
+  enddo
+
+end 
+
+! ---
+
+subroutine exch_term_jmi_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j,m = 1, elec_beta_num) < a m j | j m i > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi, j
+  double precision              :: weight, tmp
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+        
+      tmp = 0.d0
+      do j = 1, elec_beta_num
+        tmp = tmp + x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,j) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,j,i) 
+      enddo
+
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_r_in_r(ipoint,xi,i) * w_sum_in_r(ipoint,xi) & 
+                  + tc_scf_dm_in_r(ipoint) * tmp                                                       &
+                  + mos_r_in_r_array_transp(ipoint,i) * W1_l_in_r(ipoint,xi,a) * w_sum_in_r(ipoint,xi) & 
+                  ) * weight
+
+    enddo
+  enddo
+
+end
+
+! ---
+
+subroutine exch_term_ijm_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j,m = 1, elec_beta_num) < a m j | i j m > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi
+  double precision              :: weight
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+        
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * v_sum_in_r(ipoint,xi) & 
+                  + 2.d0 * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i) * W1_in_r(ipoint,xi)                             &
+                  ) * weight
+
+    enddo
+  enddo
+
+end
+
+! ---
+
+subroutine direct_term_ijj_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j = 1, elec_beta_num) < a j j | i j j > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi
+  double precision              :: weight
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * mos_r_in_r_array_transp(ipoint,i) * ww_sum_in_r(ipoint,xi) & 
+                  + 2.d0 * W1_diag_in_r(ipoint, xi) * x_W_ki_bi_ortho_erf_rk(ipoint,xi,a,i)                        &
+                  ) * weight
+    enddo
+  enddo
+
+end 
+
+! ---
+
+subroutine cyclic_term_jim_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j,m = 1, elec_beta_num) < a m j | j i m > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi
+  double precision              :: weight
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+        
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_W1_r_in_r(ipoint,xi,i)  & 
+                  + W1_W1_l_in_r(ipoint,xi,a) * mos_r_in_r_array_transp(ipoint,i)  &
+                  + W1_l_in_r(ipoint,xi,a) * W1_r_in_r(ipoint,xi,i)                &
+                  ) * weight
+
+    enddo
+  enddo
+
+end
+
+! ---
+
+subroutine cyclic_term_mji_bi_ortho(a, i, integral)
+
+  BEGIN_DOC
+  ! computes sum_(j,m = 1, elec_beta_num) < a m j | m j i > with bi ortho mos
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: i, a
+  double precision, intent(out) :: integral
+
+  integer                       :: ipoint, xi
+  double precision              :: weight
+
+  integral = 0.d0
+  do xi = 1, 3
+    do ipoint = 1, n_points_final_grid
+      weight = final_weight_at_r_vector(ipoint)
+        
+      integral += ( mos_l_in_r_array_transp(ipoint,a) * W1_W1_r_in_r(ipoint,xi,i)  & 
+                  + W1_l_in_r(ipoint,xi,a) * W1_r_in_r(ipoint,xi,i)                &
+                  + W1_W1_l_in_r(ipoint,xi,a) * mos_r_in_r_array_transp(ipoint,i)  &
+                  ) * weight
+
+    enddo
+  enddo
+
+end
+
+! ---
+
diff --git a/src/tc_scf/minimize_tc_angles.irp.f b/src/tc_scf/minimize_tc_angles.irp.f
new file mode 100644
index 00000000..cb729eb2
--- /dev/null
+++ b/src/tc_scf/minimize_tc_angles.irp.f
@@ -0,0 +1,12 @@
+program print_angles
+ implicit none
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 14 ! small grid for quick debug
+  touch my_n_pt_r_grid my_n_pt_a_grid
+!  call sort_by_tc_fock
+  call minimize_tc_orb_angles
+end
+
diff --git a/src/tc_scf/molden_lr_mos.irp.f b/src/tc_scf/molden_lr_mos.irp.f
new file mode 100644
index 00000000..735349ba
--- /dev/null
+++ b/src/tc_scf/molden_lr_mos.irp.f
@@ -0,0 +1,176 @@
+program molden
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, 'starting ...'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  call molden_lr
+end
+subroutine molden_lr
+  implicit none
+  BEGIN_DOC
+  ! Produces a Molden file
+  END_DOC
+  character*(128)                :: output
+  integer                        :: i_unit_output,getUnitAndOpen
+  integer                        :: i,j,k,l
+  double precision, parameter :: a0 = 0.529177249d0
+
+  PROVIDE ezfio_filename
+
+  output=trim(ezfio_filename)//'.mol'
+  print*,'output = ',trim(output)
+
+  i_unit_output = getUnitAndOpen(output,'w')
+
+  write(i_unit_output,'(A)') '[Molden Format]'
+
+  write(i_unit_output,'(A)') '[Atoms] Angs'
+  do i = 1, nucl_num
+    write(i_unit_output,'(A2,2X,I4,2X,I4,3(2X,F15.10))')                   &
+        trim(element_name(int(nucl_charge(i)))),                     &
+        i,                                                           &
+        int(nucl_charge(i)),                                         &
+        nucl_coord(i,1)*a0, nucl_coord(i,2)*a0, nucl_coord(i,3)*a0
+  enddo
+
+  write(i_unit_output,'(A)') '[GTO]'
+
+  character*(1)                  :: character_shell
+  integer                        :: i_shell,i_prim,i_ao
+  integer                        :: iorder(ao_num)
+  integer                        :: nsort(ao_num)
+
+  i_shell = 0
+  i_prim = 0
+  do i=1,nucl_num
+    write(i_unit_output,*) i, 0
+    do j=1,nucl_num_shell_aos(i)
+      i_shell +=1
+      i_ao = nucl_list_shell_aos(i,j)
+      character_shell = trim(ao_l_char(i_ao))
+      write(i_unit_output,*) character_shell, ao_prim_num(i_ao), '1.00'
+      do k = 1, ao_prim_num(i_ao)
+        i_prim +=1
+        write(i_unit_output,'(E20.10,2X,E20.10)') ao_expo(i_ao,k), ao_coef(i_ao,k)
+      enddo
+      l = i_ao
+      do while ( ao_l(l) == ao_l(i_ao) )
+        nsort(l) = i*10000 + j*100
+        l += 1
+        if (l > ao_num) exit
+      enddo
+    enddo
+    write(i_unit_output,*)''
+  enddo
+
+
+  do i=1,ao_num
+    iorder(i) = i
+    ! p
+    if      ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 1
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 2
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 3
+    ! d
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 1
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 2
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 3
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 4
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 5
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 6
+    ! f
+    else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 1
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 2
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 3 )) then
+      nsort(i) += 3
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 4
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 5
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 6
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 7
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 8
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 9
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 10
+    ! g
+    else if ((ao_power(i,1) == 4 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 1
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 4 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 2
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 4 )) then
+      nsort(i) += 3
+    else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 4
+    else if ((ao_power(i,1) == 3 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 5
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 6
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 3 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 7
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 3 )) then
+      nsort(i) += 8
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 3 )) then
+      nsort(i) += 9
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 0 )) then
+      nsort(i) += 10
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 0 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 11
+    else if ((ao_power(i,1) == 0 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 12
+    else if ((ao_power(i,1) == 2 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 13
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 2 ).and.(ao_power(i,3) == 1 )) then
+      nsort(i) += 14
+    else if ((ao_power(i,1) == 1 ).and.(ao_power(i,2) == 1 ).and.(ao_power(i,3) == 2 )) then
+      nsort(i) += 15
+    endif
+  enddo
+
+  call isort(nsort,iorder,ao_num)
+  write(i_unit_output,'(A)') '[MO]'
+  do i=1,mo_num
+    write (i_unit_output,*) 'Sym= 1'
+    write (i_unit_output,*) 'Ene=', Fock_matrix_tc_mo_tot(i,i)
+    write (i_unit_output,*) 'Spin= Alpha'
+    write (i_unit_output,*) 'Occup=', mo_occ(i)
+    do j=1,ao_num
+      write(i_unit_output, '(I6,2X,E20.10)') j, mo_r_coef(iorder(j),i)
+    enddo
+
+    write (i_unit_output,*) 'Sym= 1'
+    write (i_unit_output,*) 'Ene=', Fock_matrix_tc_mo_tot(i,i)
+    write (i_unit_output,*) 'Spin= Alpha'
+    write (i_unit_output,*) 'Occup=', mo_occ(i)
+    do j=1,ao_num
+      write(i_unit_output, '(I6,2X,E20.10)') j, mo_l_coef(iorder(j),i)
+    enddo
+  enddo
+  close(i_unit_output)
+end
+
diff --git a/src/tc_scf/print_angle_tc_orb.irp.f b/src/tc_scf/print_angle_tc_orb.irp.f
new file mode 100644
index 00000000..09260395
--- /dev/null
+++ b/src/tc_scf/print_angle_tc_orb.irp.f
@@ -0,0 +1,9 @@
+program print_angles
+ implicit none
+  my_grid_becke  = .True.
+!  my_n_pt_r_grid = 30
+!  my_n_pt_a_grid = 50
+  my_n_pt_r_grid = 10 ! small grid for quick debug
+  my_n_pt_a_grid = 14 ! small grid for quick debug
+  call print_angles_tc
+end
diff --git a/src/tc_scf/print_fit_param.irp.f b/src/tc_scf/print_fit_param.irp.f
new file mode 100644
index 00000000..f8bcfa7f
--- /dev/null
+++ b/src/tc_scf/print_fit_param.irp.f
@@ -0,0 +1,60 @@
+program print_fit_param
+
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  !call create_guess
+  !call orthonormalize_mos
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer :: i
+
+  mu_erf = 1.d0
+  touch mu_erf
+
+  print *, ' fit for (1 - erf(x))^2'
+  do i = 1, n_max_fit_slat
+    print*, expo_gauss_1_erf_x_2(i), coef_gauss_1_erf_x_2(i)
+  enddo
+
+  print *, ''
+  print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]'
+  do i = 1, n_max_fit_slat
+    print *, expo_gauss_j_mu_x(i), 2.d0 * coef_gauss_j_mu_x(i)
+  enddo
+
+  print *, ''
+  print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]^2'
+  do i = 1, n_max_fit_slat
+    print *, expo_gauss_j_mu_x_2(i), 4.d0 * coef_gauss_j_mu_x_2(i)
+  enddo
+
+  print *, ''
+  print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)] x [1 - erf(mu * r12)]'
+  do i = 1, n_max_fit_slat
+    print *, expo_gauss_j_mu_1_erf(i), 4.d0 * coef_gauss_j_mu_1_erf(i)
+  enddo
+
+  return
+end subroutine main
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf.irp.f b/src/tc_scf/rh_tcscf.irp.f
new file mode 100644
index 00000000..0312df5f
--- /dev/null
+++ b/src/tc_scf/rh_tcscf.irp.f
@@ -0,0 +1,336 @@
+! ---
+
+subroutine rh_tcscf()
+
+  BEGIN_DOC
+  !
+  ! Roothaan-Hall algorithm for TC-SCF calculation
+  !
+  END_DOC
+
+  implicit none
+
+  integer                       :: i, j
+  integer                       :: iteration_TCSCF, dim_DIIS, index_dim_DIIS
+  double precision              :: energy_TCSCF, energy_TCSCF_1e, energy_TCSCF_2e, energy_TCSCF_3e, gradie_TCSCF
+  double precision              :: energy_TCSCF_previous, delta_energy_TCSCF
+  double precision              :: gradie_TCSCF_previous, delta_gradie_TCSCF
+  double precision              :: max_error_DIIS_TCSCF
+  double precision              :: level_shift_save
+  double precision              :: delta_energy_tmp, delta_gradie_tmp
+  double precision, allocatable :: F_DIIS(:,:,:), e_DIIS(:,:,:)
+  double precision, allocatable :: mo_r_coef_save(:,:), mo_l_coef_save(:,:)
+
+  logical, external             :: qp_stop
+
+
+  !PROVIDE ao_md5 mo_occ
+  PROVIDE level_shift_TCSCF
+
+  allocate( mo_r_coef_save(ao_num,mo_num), mo_l_coef_save(ao_num,mo_num) &
+          , F_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF), e_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF) )
+
+  F_DIIS         = 0.d0
+  e_DIIS         = 0.d0
+  mo_l_coef_save = 0.d0
+  mo_r_coef_save = 0.d0
+
+  call write_time(6)
+
+  ! ---
+  ! Initialize energies and density matrices
+
+  energy_TCSCF_previous = TC_HF_energy
+  energy_TCSCF_1e       = TC_HF_one_e_energy
+  energy_TCSCF_2e       = TC_HF_two_e_energy
+  energy_TCSCF_3e       = 0.d0
+  if(three_body_h_tc) then
+    energy_TCSCF_3e     = diag_three_elem_hf
+  endif
+  gradie_TCSCF_previous = grad_non_hermit
+  delta_energy_TCSCF    = 1.d0
+  delta_gradie_TCSCF    = 1.d0
+  iteration_TCSCF       = 0
+  dim_DIIS              = 0
+  max_error_DIIS_TCSCF  = 1.d0
+
+  ! ---
+
+  ! Start of main SCF loop
+
+  PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot
+
+  do while( (max_error_DIIS_TCSCF > threshold_DIIS_nonzero_TCSCF) .or. &
+            !(dabs(delta_energy_TCSCF) > thresh_TCSCF)             .or. &
+            (dabs(gradie_TCSCF_previous) > dsqrt(thresh_TCSCF))        )
+
+    iteration_TCSCF += 1
+    if(iteration_TCSCF > n_it_TCSCF_max) then
+      print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+      stop
+    endif
+
+    dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF)
+
+    ! ---
+
+    if((tcscf_algorithm == 'DIIS') .and. (dabs(delta_energy_TCSCF) > 1.d-6))  then
+
+      ! store Fock and error matrices at each iteration
+      index_dim_DIIS = mod(dim_DIIS-1, max_dim_DIIS_TCSCF) + 1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          F_DIIS(i,j,index_dim_DIIS) = Fock_matrix_tc_ao_tot(i,j)
+          e_DIIS(i,j,index_dim_DIIS) = FQS_SQF_ao(i,j)
+        enddo
+      enddo
+
+      call extrapolate_TC_Fock_matrix(e_DIIS, F_DIIS, Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), iteration_TCSCF, dim_DIIS)
+
+      Fock_matrix_tc_ao_alpha = 0.5d0 * Fock_matrix_tc_ao_tot
+      Fock_matrix_tc_ao_beta  = 0.5d0 * Fock_matrix_tc_ao_tot
+      !TOUCH Fock_matrix_tc_ao_alpha Fock_matrix_tc_ao_beta
+
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+                            , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta , size(Fock_matrix_tc_ao_beta , 1) &
+                            , Fock_matrix_tc_mo_beta , size(Fock_matrix_tc_mo_beta , 1) )
+      TOUCH Fock_matrix_tc_mo_alpha Fock_matrix_tc_mo_beta
+    endif
+
+    ! ---
+
+    mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+    mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+    TOUCH mo_l_coef mo_r_coef
+
+    ! ---
+
+    ! calculate error vectors
+    max_error_DIIS_TCSCF = maxval(abs(FQS_SQF_mo))
+
+    ! ---
+
+    delta_energy_tmp = TC_HF_energy    - energy_TCSCF_previous
+    delta_gradie_tmp = grad_non_hermit - gradie_TCSCF_previous
+
+    ! ---
+
+    do while((delta_gradie_tmp > 1.d-7) .and. (iteration_TCSCF > 1))
+    !do while((dabs(delta_energy_tmp) > 0.5d0) .and. (iteration_TCSCF > 1))
+      print *, ' very big or bad step  : ', delta_energy_tmp, delta_gradie_tmp
+      print *, ' TC level shift = ', level_shift_TCSCF
+
+      mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num) 
+      mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num) 
+
+      if(level_shift_TCSCF <= .1d0) then
+        level_shift_TCSCF = 1.d0
+      else
+        level_shift_TCSCF = level_shift_TCSCF * 3.0d0
+      endif
+      TOUCH mo_l_coef mo_r_coef level_shift_TCSCF
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      TOUCH mo_l_coef mo_r_coef
+
+      delta_energy_tmp = TC_HF_energy    - energy_TCSCF_previous
+      delta_gradie_tmp = grad_non_hermit - gradie_TCSCF_previous
+
+      if(level_shift_TCSCF - level_shift_save > 40.d0) then
+        level_shift_TCSCF = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift_TCSCF
+        exit
+      endif
+
+      dim_DIIS = 0
+    enddo
+!    print *, ' very big step  : ', delta_energy_tmp
+!    print *, ' TC level shift = ', level_shift_TCSCF
+
+    ! ---
+
+    level_shift_TCSCF = 0.d0
+    !level_shift_TCSCF = level_shift_TCSCF * 0.5d0
+    SOFT_TOUCH level_shift_TCSCF
+
+    gradie_TCSCF       = grad_non_hermit
+    energy_TCSCF       = TC_HF_energy
+    energy_TCSCF_1e    = TC_HF_one_e_energy
+    energy_TCSCF_2e    = TC_HF_two_e_energy
+    energy_TCSCF_3e    = 0.d0
+    if(three_body_h_tc) then
+      energy_TCSCF_3e  = diag_three_elem_hf
+    endif
+    delta_energy_TCSCF = energy_TCSCF - energy_TCSCF_previous
+    delta_gradie_TCSCF = gradie_TCSCF - gradie_TCSCF_previous
+
+    energy_TCSCF_previous = energy_TCSCF
+    gradie_TCSCF_previous = gradie_TCSCF
+
+
+    level_shift_save = level_shift_TCSCF
+    mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num)
+    mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num)
+
+
+    print *, ' iteration         = ', iteration_TCSCF
+    print *, ' total TC energy   = ', energy_TCSCF 
+    print *, ' 1-e   TC energy   = ', energy_TCSCF_1e
+    print *, ' 2-e   TC energy   = ', energy_TCSCF_2e
+    print *, ' 3-e   TC energy   = ', energy_TCSCF_3e
+    print *, ' |delta TC energy| = ', dabs(delta_energy_TCSCF)
+    print *, ' TC gradient       = ', gradie_TCSCF
+    print *, ' delta TC gradient = ', delta_gradie_TCSCF
+    print *, ' max TC DIIS error = ', max_error_DIIS_TCSCF 
+    print *, ' TC DIIS dim       = ', dim_DIIS
+    print *, ' TC level shift    = ', level_shift_TCSCF
+    print *, ' '
+
+    call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+    call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+
+    if(qp_stop()) exit
+  enddo
+
+  ! ---
+
+  print *, ' TCSCF DIIS converged !'
+  call print_energy_and_mos()
+
+  call write_time(6)
+
+  deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, e_DIIS)
+
+end
+
+! ---
+
+subroutine extrapolate_TC_Fock_matrix(e_DIIS, F_DIIS, F_ao, size_F_ao, iteration_TCSCF, dim_DIIS)
+
+  BEGIN_DOC
+  !
+  ! Compute the extrapolated Fock matrix using the DIIS procedure
+  !
+  ! e = \sum_i c_i e_i and \sum_i c_i = 1 
+  ! ==> lagrange multiplier with L = |e|^2 - \lambda (\sum_i c_i = 1)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)    :: iteration_TCSCF, size_F_ao
+  integer,          intent(inout) :: dim_DIIS
+  double precision, intent(in)    :: F_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(in)    :: e_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(inout) :: F_ao(size_F_ao,ao_num)
+
+  double precision, allocatable   :: B_matrix_DIIS(:,:), X_vector_DIIS(:), C_vector_DIIS(:)
+
+  integer                         :: i, j, k, l, i_DIIS, j_DIIS
+  integer                         :: lwork
+  double precision                :: rcond, ferr, berr
+  integer,          allocatable   :: iwork(:)
+  double precision, allocatable   :: scratch(:,:)
+
+  if(dim_DIIS < 1) then
+    return
+  endif
+
+  allocate( B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), X_vector_DIIS(dim_DIIS+1) &
+          , C_vector_DIIS(dim_DIIS+1), scratch(ao_num,ao_num) )
+
+  ! Compute the matrices B and X
+  B_matrix_DIIS(:,:) = 0.d0
+  do j = 1, dim_DIIS
+    j_DIIS = min(dim_DIIS, mod(iteration_TCSCF-j, max_dim_DIIS_TCSCF)+1)
+
+    do i = 1, dim_DIIS
+      i_DIIS = min(dim_DIIS, mod(iteration_TCSCF-i, max_dim_DIIS_TCSCF)+1)
+
+      ! Compute product of two errors vectors
+      do l = 1, ao_num
+        do k = 1, ao_num
+          B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + e_DIIS(k,l,i_DIIS) * e_DIIS(k,l,j_DIIS)
+        enddo
+      enddo
+
+    enddo
+  enddo
+
+  ! Pad B matrix and build the X matrix
+
+  C_vector_DIIS(:) = 0.d0
+  do i = 1, dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+  enddo
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+  deallocate(scratch)
+
+  ! Estimate condition number of B
+  integer                       :: info
+  double precision              :: anorm
+  integer,          allocatable :: ipiv(:)
+  double precision, allocatable :: AF(:,:)
+  double precision, external :: dlange
+
+  lwork = max((dim_DIIS+1)**2, (dim_DIIS+1)*5)
+  allocate(AF(dim_DIIS+1,dim_DIIS+1))
+  allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) )
+  allocate(scratch(lwork,1))
+  scratch(:,1) = 0.d0
+
+  anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, size(B_matrix_DIIS, 1), scratch(1,1))
+
+  AF(:,:) = B_matrix_DIIS(:,:)
+  call dgetrf(dim_DIIS+1, dim_DIIS+1, AF, size(AF, 1), ipiv, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  call dgecon('1', dim_DIIS+1, AF, size(AF, 1), anorm, rcond, scratch, iwork, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  if(rcond < 1.d-14) then
+    dim_DIIS = 0
+    return
+  endif
+
+  ! solve the linear system C = B x X
+
+  X_vector_DIIS = C_vector_DIIS
+  call dgesv(dim_DIIS+1, 1, B_matrix_DIIS, size(B_matrix_DIIS, 1), ipiv , X_vector_DIIS, size(X_vector_DIIS, 1), info)
+
+  deallocate(scratch, AF, iwork)
+  if(info < 0) then
+    stop ' bug in TC-DIIS'
+  endif
+
+  ! Compute extrapolated Fock matrix
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num > 200)
+  do j = 1, ao_num
+    do i = 1, ao_num
+      F_ao(i,j) = 0.d0
+    enddo
+    do k = 1, dim_DIIS
+      if(dabs(X_vector_DIIS(k)) < 1.d-10) cycle
+      do i = 1,ao_num
+        ! FPE here
+        F_ao(i,j) = F_ao(i,j) + X_vector_DIIS(k) * F_DIIS(i,j,dim_DIIS-k+1)
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f
new file mode 100644
index 00000000..306c78b3
--- /dev/null
+++ b/src/tc_scf/rh_tcscf_diis.irp.f
@@ -0,0 +1,362 @@
+! ---
+
+subroutine rh_tcscf_diis()
+
+  implicit none
+
+  integer                       :: i, j, it
+  integer                       :: dim_DIIS, index_dim_DIIS
+  double precision              :: etc_tot, etc_1e, etc_2e, etc_3e, e_save, e_delta
+  double precision              :: tc_grad, g_save, g_delta, g_delta_th
+  double precision              :: level_shift_save, rate_th
+  double precision              :: t0, t1
+  double precision              :: er_DIIS, er_delta, er_save, er_delta_th
+  double precision, allocatable :: F_DIIS(:,:,:), E_DIIS(:,:,:)
+  double precision, allocatable :: mo_r_coef_save(:,:), mo_l_coef_save(:,:)
+
+  logical, external             :: qp_stop
+
+  it          = 0
+  e_save      = 0.d0
+  dim_DIIS    = 0
+  g_delta_th  = 1d0
+  er_delta_th = 1d0
+  rate_th     = 100.d0 !0.01d0 !0.2d0
+
+  allocate(mo_r_coef_save(ao_num,mo_num), mo_l_coef_save(ao_num,mo_num))
+  mo_l_coef_save = 0.d0
+  mo_r_coef_save = 0.d0
+
+  allocate(F_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF), E_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF))
+  F_DIIS = 0.d0
+  E_DIIS = 0.d0
+
+  call write_time(6)
+
+  ! ---
+
+  PROVIDE level_shift_TCSCF
+  PROVIDE mo_l_coef mo_r_coef
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '================', '====', '========'
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+          , '    gradient    ', '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '================', '====', '========'
+
+
+  ! first iteration (HF orbitals)
+  call wall_time(t0)
+
+  etc_tot = TC_HF_energy
+  etc_1e  = TC_HF_one_e_energy
+  etc_2e  = TC_HF_two_e_energy
+  etc_3e  = 0.d0
+  if(three_body_h_tc) then
+    etc_3e = diag_three_elem_hf
+  endif
+  tc_grad = grad_non_hermit
+  er_DIIS = maxval(abs(FQS_SQF_mo))
+  e_delta = dabs(etc_tot - e_save)
+
+  e_save  = etc_tot
+  g_save  = tc_grad
+  er_save = er_DIIS
+
+  call wall_time(t1)
+  write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+    it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+  ! ---
+
+  PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot
+
+  do while((tc_grad .gt. dsqrt(thresh_tcscf)) .and. (er_DIIS .gt. threshold_DIIS_nonzero_TCSCF))
+
+    call wall_time(t0)
+
+    it += 1
+    if(it > n_it_TCSCF_max) then
+      print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+      stop
+    endif
+
+    dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF)
+
+    ! ---
+
+    if(dabs(e_delta) > 1.d-12) then
+
+      index_dim_DIIS = mod(dim_DIIS-1, max_dim_DIIS_TCSCF) + 1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          F_DIIS(i,j,index_dim_DIIS) = Fock_matrix_tc_ao_tot(i,j)
+          E_DIIS(i,j,index_dim_DIIS) = FQS_SQF_ao           (i,j)
+        enddo
+      enddo
+
+      call extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), it, dim_DIIS)
+
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                            , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) )
+      TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot
+    endif
+
+    ! ---
+
+    mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+    mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+    !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+    !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    TOUCH mo_l_coef mo_r_coef
+
+    ! ---
+
+    g_delta  = grad_non_hermit         -  g_save
+    er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+    !if((g_delta > rate_th * g_delta_th) .and. (er_delta > rate_th * er_delta_th) .and. (it > 1)) then
+    if((g_delta > rate_th * g_delta_th) .and. (it > 1)) then
+    !if((g_delta > 0.d0) .and. (it > 1)) then
+
+      Fock_matrix_tc_ao_tot(1:ao_num,1:ao_num) = F_DIIS(1:ao_num,1:ao_num,index_dim_DIIS)
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                            , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) )
+      TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+    endif
+
+    ! ---
+
+    g_delta  = grad_non_hermit         -  g_save
+    er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+    mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num)
+    mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num)
+
+    !do while((g_delta > rate_th * g_delta_th) .and. (er_delta > rate_th * er_delta_th) .and. (it > 1))
+    do while((g_delta > rate_th * g_delta_th) .and. (it > 1))
+      print *, ' big or bad step : ', g_delta, rate_th * g_delta_th
+
+      mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num) 
+      mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num) 
+      if(level_shift_TCSCF <= .1d0) then
+        level_shift_TCSCF = 1.d0
+      else
+        level_shift_TCSCF = level_shift_TCSCF * 3.0d0
+      endif
+      TOUCH mo_l_coef mo_r_coef level_shift_TCSCF
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+      g_delta  = grad_non_hermit         -  g_save
+      er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+      if(level_shift_TCSCF - level_shift_save > 40.d0) then
+        level_shift_TCSCF = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift_TCSCF
+        exit
+      endif
+
+      dim_DIIS = 0
+    enddo
+
+    ! ---
+
+    level_shift_TCSCF = level_shift_TCSCF * 0.5d0
+    SOFT_TOUCH level_shift_TCSCF
+
+    etc_tot = TC_HF_energy
+    etc_1e  = TC_HF_one_e_energy
+    etc_2e  = TC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    tc_grad  = grad_non_hermit
+    er_DIIS  = maxval(abs(FQS_SQF_mo))
+    e_delta  = dabs(etc_tot - e_save)
+    g_delta  = tc_grad - g_save
+    er_delta = er_DIIS - er_save
+    
+    e_save           = etc_tot
+    g_save           = tc_grad
+    level_shift_save = level_shift_TCSCF
+    er_save          = er_DIIS
+
+    g_delta_th  = dabs(tc_grad) ! g_delta)
+    er_delta_th = dabs(er_DIIS) !er_delta)
+
+    call wall_time(t1)
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+    if(g_delta .lt. 0.d0) then
+      call ezfio_set_tc_scf_bitc_energy(etc_tot)
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    endif
+
+    if(qp_stop()) exit
+  enddo
+
+  ! ---
+
+  print *, ' TCSCF DIIS converged !'
+  call print_energy_and_mos()
+
+  call write_time(6)
+
+  deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS)
+
+  call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+
+end
+
+! ---
+
+subroutine extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, F_ao, size_F_ao, it, dim_DIIS)
+
+  BEGIN_DOC
+  !
+  ! Compute the extrapolated Fock matrix using the DIIS procedure
+  !
+  ! e = \sum_i c_i e_i and \sum_i c_i = 1 
+  ! ==> lagrange multiplier with L = |e|^2 - \lambda (\sum_i c_i = 1)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)    :: it, size_F_ao
+  integer,          intent(inout) :: dim_DIIS
+  double precision, intent(in)    :: F_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(in)    :: E_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(inout) :: F_ao(size_F_ao,ao_num)
+
+  double precision, allocatable   :: B_matrix_DIIS(:,:), X_vector_DIIS(:), C_vector_DIIS(:)
+
+  integer                         :: i, j, k, l, i_DIIS, j_DIIS
+  integer                         :: lwork
+  double precision                :: rcond, ferr, berr
+  integer,          allocatable   :: iwork(:)
+  double precision, allocatable   :: scratch(:,:)
+
+  if(dim_DIIS < 1) then
+    return
+  endif
+
+  allocate( B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), X_vector_DIIS(dim_DIIS+1) &
+          , C_vector_DIIS(dim_DIIS+1), scratch(ao_num,ao_num) )
+
+  ! Compute the matrices B and X
+  B_matrix_DIIS(:,:) = 0.d0
+  do j = 1, dim_DIIS
+    j_DIIS = min(dim_DIIS, mod(it-j, max_dim_DIIS_TCSCF)+1)
+
+    do i = 1, dim_DIIS
+      i_DIIS = min(dim_DIIS, mod(it-i, max_dim_DIIS_TCSCF)+1)
+
+      ! Compute product of two errors vectors
+      do l = 1, ao_num
+        do k = 1, ao_num
+          B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + E_DIIS(k,l,i_DIIS) * E_DIIS(k,l,j_DIIS)
+        enddo
+      enddo
+
+    enddo
+  enddo
+
+  ! Pad B matrix and build the X matrix
+
+  C_vector_DIIS(:) = 0.d0
+  do i = 1, dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+  enddo
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+  deallocate(scratch)
+
+  ! Estimate condition number of B
+  integer                       :: info
+  double precision              :: anorm
+  integer,          allocatable :: ipiv(:)
+  double precision, allocatable :: AF(:,:)
+  double precision, external :: dlange
+
+  lwork = max((dim_DIIS+1)**2, (dim_DIIS+1)*5)
+  allocate(AF(dim_DIIS+1,dim_DIIS+1))
+  allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) )
+  allocate(scratch(lwork,1))
+  scratch(:,1) = 0.d0
+
+  anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, size(B_matrix_DIIS, 1), scratch(1,1))
+
+  AF(:,:) = B_matrix_DIIS(:,:)
+  call dgetrf(dim_DIIS+1, dim_DIIS+1, AF, size(AF, 1), ipiv, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  call dgecon('1', dim_DIIS+1, AF, size(AF, 1), anorm, rcond, scratch, iwork, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  if(rcond < 1.d-14) then
+    dim_DIIS = 0
+    return
+  endif
+
+  ! solve the linear system C = B x X
+
+  X_vector_DIIS = C_vector_DIIS
+  call dgesv(dim_DIIS+1, 1, B_matrix_DIIS, size(B_matrix_DIIS, 1), ipiv , X_vector_DIIS, size(X_vector_DIIS, 1), info)
+
+  deallocate(scratch, AF, iwork)
+  if(info < 0) then
+    stop ' bug in TC-DIIS'
+  endif
+
+  ! Compute extrapolated Fock matrix
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num > 200)
+  do j = 1, ao_num
+    do i = 1, ao_num
+      F_ao(i,j) = 0.d0
+    enddo
+    do k = 1, dim_DIIS
+      if(dabs(X_vector_DIIS(k)) < 1.d-10) cycle
+      do i = 1,ao_num
+        ! FPE here
+        F_ao(i,j) = F_ao(i,j) + X_vector_DIIS(k) * F_DIIS(i,j,dim_DIIS-k+1)
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf_simple.irp.f b/src/tc_scf/rh_tcscf_simple.irp.f
new file mode 100644
index 00000000..30798e3d
--- /dev/null
+++ b/src/tc_scf/rh_tcscf_simple.irp.f
@@ -0,0 +1,129 @@
+! ---
+
+subroutine rh_tcscf_simple()
+
+  implicit none
+  integer                       :: i, j, it, dim_DIIS
+  double precision              :: t0, t1
+  double precision              :: e_save, e_delta, rho_delta
+  double precision              :: etc_tot, etc_1e, etc_2e, etc_3e, tc_grad
+  double precision              :: er_DIIS
+  double precision, allocatable :: rho_old(:,:), rho_new(:,:)
+
+  allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num))
+
+  it       = 0
+  e_save   = 0.d0
+  dim_DIIS = 0
+
+  ! ---
+
+  if(.not. bi_ortho) then
+   print *, ' grad_hermit = ', grad_hermit
+   call save_good_hermit_tc_eigvectors
+   TOUCH mo_coef 
+   call save_mos
+  endif
+
+  ! ---
+
+  if(bi_ortho) then
+
+    PROVIDE level_shift_tcscf
+    PROVIDE mo_l_coef mo_r_coef
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      '====', '================', '================', '================', '================', '================' &
+            , '================', '================', '================', '====', '========'
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+            , '    gradient    ', '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      '====', '================', '================', '================', '================', '================' &
+            , '================', '================', '================', '====', '========'
+
+
+    ! first iteration (HF orbitals)
+    call wall_time(t0)
+
+    etc_tot = TC_HF_energy
+    etc_1e  = TC_HF_one_e_energy
+    etc_2e  = TC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    tc_grad = grad_non_hermit
+    er_DIIS = maxval(abs(FQS_SQF_mo))
+    e_delta = dabs(etc_tot - e_save)
+    e_save  = etc_tot
+
+    call wall_time(t1)
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+    do while(tc_grad .gt. dsqrt(thresh_tcscf))
+      call wall_time(t0)
+
+      it += 1
+      if(it > n_it_tcscf_max) then
+        print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+        stop
+      endif
+
+      mo_l_coef = fock_tc_leigvec_ao
+      mo_r_coef = fock_tc_reigvec_ao
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+      etc_tot = TC_HF_energy
+      etc_1e  = TC_HF_one_e_energy
+      etc_2e  = TC_HF_two_e_energy
+      etc_3e  = 0.d0
+      if(three_body_h_tc) then
+        etc_3e = diag_three_elem_hf
+      endif
+      tc_grad = grad_non_hermit
+      er_DIIS = maxval(abs(FQS_SQF_mo))
+      e_delta = dabs(etc_tot - e_save)
+      e_save  = etc_tot
+
+      call ezfio_set_tc_scf_bitc_energy(etc_tot)
+
+      call wall_time(t1)
+      write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+        it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+    enddo
+
+  else
+
+   do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. (it.lt.n_it_tcscf_max) )
+      print*,'grad_hermit = ',grad_hermit
+      it += 1
+      print *, 'iteration = ', it
+      print *, '***'
+      print *, 'TC HF total energy = ', TC_HF_energy
+      print *, 'TC HF 1 e   energy = ', TC_HF_one_e_energy
+      print *, 'TC HF 2 e   energy = ', TC_HF_two_e_energy
+      print *, 'TC HF 3 body       = ', diag_three_elem_hf
+      print *, '***'
+      print *, ''
+      call save_good_hermit_tc_eigvectors
+      TOUCH mo_coef 
+      call save_mos
+    enddo
+
+  endif
+
+  print *, ' TCSCF Simple converged !'
+  call print_energy_and_mos()
+
+  deallocate(rho_old, rho_new)
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f
new file mode 100644
index 00000000..fc4a7935
--- /dev/null
+++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f
@@ -0,0 +1,367 @@
+
+! ---
+
+program rotate_tcscf_orbitals
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  bi_ortho = .True.
+  touch bi_ortho
+
+  call maximize_overlap()
+
+end
+
+! ---
+
+subroutine maximize_overlap()
+
+  implicit none
+  integer                       :: i, m, n
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: C(:,:), R(:,:), L(:,:), W(:,:), e(:)
+  double precision, allocatable :: S(:,:)
+
+  n = ao_num
+  m = mo_num
+
+  allocate(L(n,m), R(n,m), C(n,m), W(n,n), e(m))
+  L = mo_l_coef
+  R = mo_r_coef
+  C = mo_coef
+  W = ao_overlap
+
+  print*, ' fock matrix diag elements'
+  do i = 1, m
+    e(i) = Fock_matrix_tc_mo_tot(i,i)
+    print*, e(i)
+  enddo
+
+  ! ---
+   
+  print *, ' overlap before :'
+  print *, ' '
+
+  allocate(S(m,m)) 
+
+  call LTxSxR(n, m, L, W, R, S)
+  !print*, " L.T x R"
+  !do i = 1, m
+  !  write(*, '(100(F16.10,X))') S(i,i)
+  !enddo
+  call LTxSxR(n, m, L, W, C, S)
+  print*, " L.T x C"
+  do i = 1, m
+    write(*, '(100(F16.10,X))') S(i,:)
+  enddo
+  call LTxSxR(n, m, C, W, R, S)
+  print*, " C.T x R"
+  do i = 1, m
+    write(*, '(100(F16.10,X))') S(i,:)
+  enddo
+
+  deallocate(S)
+
+  ! ---
+
+  call rotate_degen_eigvec_to_maximize_overlap(n, m, e, C, W, L, R)
+
+  ! ---
+   
+  print *, ' overlap after :'
+  print *, ' '
+
+  allocate(S(m,m)) 
+
+  call LTxSxR(n, m, L, W, R, S)
+  !print*, " L.T x R"
+  !do i = 1, m
+  !  write(*, '(100(F16.10,X))') S(i,i)
+  !enddo
+  call LTxSxR(n, m, L, W, C, S)
+  print*, " L.T x C"
+  do i = 1, m
+    write(*, '(100(F16.10,X))') S(i,:)
+  enddo
+  call LTxSxR(n, m, C, W, R, S)
+  print*, " C.T x R"
+  do i = 1, m
+    write(*, '(100(F16.10,X))') S(i,:)
+  enddo
+
+  deallocate(S)
+
+  ! ---
+
+  mo_l_coef = L
+  mo_r_coef = R
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+
+  ! ---
+
+  deallocate(L, R, C, W, e)
+
+end subroutine maximize_overlap
+
+! ---
+
+subroutine rotate_degen_eigvec_to_maximize_overlap(n, m, e0, C0, W0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(in)    :: e0(m), W0(n,n), C0(n,m)
+  double precision, intent(inout) :: L0(n,m), R0(n,m)
+
+
+  integer                         :: i, j, k, kk, mm, id1, tot_deg
+  double precision                :: ei, ej, de, de_thr
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), C(:,:), Lnew(:,:), Rnew(:,:), tmp(:,:)
+  !double precision, allocatable   :: S(:,:), Snew(:,:), T(:,:), Ttmp(:,:), Stmp(:,:)
+  double precision, allocatable   :: S(:,:), Snew(:,:), T(:,:), Ttmp(:,:), Stmp(:,:)
+  !real*8                          :: S(m,m), Snew(m,m), T(m,m)
+
+  id1 = 700
+  allocate(S(id1,id1), Snew(id1,id1), T(id1,id1))
+
+  ! ---
+
+  allocate( deg_num(m) )
+  do i = 1, m
+    deg_num(i) = 1
+  enddo
+
+  de_thr = thr_degen_tc
+
+  do i = 1, m-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, m
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+
+  tot_deg = 0
+  do i = 1, m
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+      tot_deg = tot_deg + 1
+    endif
+  enddo
+
+  if(tot_deg .eq. 0) then
+    print *, ' no degen'
+    return
+  endif
+
+  ! ---
+
+  do i = 1, m
+    mm = deg_num(i)
+
+    if(mm .gt. 1) then
+
+      allocate(L(n,mm), R(n,mm), C(n,mm))
+      do j = 1, mm
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+        C(1:n,j) = C0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      ! C.T x W0 x R
+      allocate(tmp(mm,n), Stmp(mm,mm))
+      call dgemm( 'T', 'N', mm, n, n, 1.d0       &
+                , C, size(C, 1), W0, size(W0, 1) &
+                , 0.d0, tmp, size(tmp, 1) )
+      call dgemm( 'N', 'N', mm, mm, n, 1.d0        &
+                , tmp, size(tmp, 1), R, size(R, 1) &
+                , 0.d0, Stmp, size(Stmp, 1) )
+      deallocate(C, tmp)
+
+      S = 0.d0
+      do k = 1, mm
+        do kk = 1, mm
+          S(kk,k) = Stmp(kk,k)
+        enddo
+      enddo
+      deallocate(Stmp)
+
+      !print*, " overlap bef"
+      !do k = 1, mm
+      !  write(*, '(100(F16.10,X))') (S(k,kk), kk=1, mm)
+      !enddo
+    
+      T    = 0.d0
+      Snew = 0.d0
+      call maxovl(mm, mm, S, T, Snew)
+
+      !print*, " overlap aft"
+      !do k = 1, mm
+      !  write(*, '(100(F16.10,X))') (Snew(k,kk), kk=1, mm)
+      !enddo
+
+      allocate(Ttmp(mm,mm))
+      Ttmp(1:mm,1:mm) = T(1:mm,1:mm)
+
+      allocate(Lnew(n,mm), Rnew(n,mm))
+      call dgemm( 'N', 'N', n, mm, mm, 1.d0               &
+                , R, size(R, 1), Ttmp(1,1), size(Ttmp, 1) &
+                , 0.d0, Rnew, size(Rnew, 1) )
+      call dgemm( 'N', 'N', n, mm, mm, 1.d0               &
+                , L, size(L, 1), Ttmp(1,1), size(Ttmp, 1) &
+                , 0.d0, Lnew, size(Lnew, 1) )
+
+      deallocate(L, R)
+      deallocate(Ttmp)
+
+      ! ---
+
+      do j = 1, mm
+        L0(1:n,i+j-1) = Lnew(1:n,j)
+        R0(1:n,i+j-1) = Rnew(1:n,j)
+      enddo
+      deallocate(Lnew, Rnew)
+
+    endif
+  enddo
+
+  deallocate(S, Snew, T)
+
+end subroutine rotate_degen_eigvec_to_maximize_overlap
+
+! ---
+
+subroutine fix_right_to_one()
+
+  implicit none
+  integer                       :: i, j, m, n, mm, tot_deg
+  double precision              :: accu_d, accu_nd
+  double precision              :: de_thr, ei, ej, de
+  integer,          allocatable :: deg_num(:)
+  double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:)
+  double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:)
+
+  n = ao_num
+  m = mo_num
+
+  allocate(L0(n,m), R0(n,m), W(n,n), e0(m))
+  L0 = mo_l_coef
+  R0 = mo_r_coef
+  W  = ao_overlap
+
+  print*, ' fock matrix diag elements'
+  do i = 1, m
+    e0(i) = Fock_matrix_tc_mo_tot(i,i)
+    print*, e0(i)
+  enddo
+
+  ! ---
+
+  allocate( deg_num(m) )
+  do i = 1, m
+    deg_num(i) = 1
+  enddo
+
+  de_thr = 1d-6
+
+  do i = 1, m-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, m
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+
+  deallocate(e0)
+
+  tot_deg = 0
+  do i = 1, m
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+      tot_deg = tot_deg + 1
+    endif
+  enddo
+
+  if(tot_deg .eq. 0) then
+    print *, ' no degen'
+    return
+  endif
+
+  ! ---
+
+  do i = 1, m
+    mm = deg_num(i)
+
+    if(mm .gt. 1) then
+
+      allocate(L(n,mm), R(n,mm))
+      do j = 1, mm
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_weighted_orthog_svd(n, mm, W, R)
+      call impose_weighted_biorthog_qr(n, mm, thresh_biorthog_diag, thresh_biorthog_nondiag, R, W, L)
+
+      ! ---
+
+      do j = 1, mm
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+      deallocate(L, R)
+
+    endif
+  enddo
+
+  call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thresh_biorthog_diag, thresh_biorthog_nondiag, .true.)
+
+  deallocate(W, deg_num)
+
+  mo_l_coef = L0
+  mo_r_coef = R0
+  deallocate(L0, R0)
+
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+  print *, ' orbitals are rotated '
+
+  return
+end subroutine fix_right_to_one
+
+! ---
diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f
new file mode 100644
index 00000000..596ae500
--- /dev/null
+++ b/src/tc_scf/routines_rotates.irp.f
@@ -0,0 +1,359 @@
+
+! ---
+
+subroutine minimize_tc_orb_angles()
+
+  implicit none
+  logical          :: good_angles
+  integer          :: i
+  double precision :: thr_deg
+
+  good_angles = .False.
+  thr_deg = thr_degen_tc
+
+  call print_energy_and_mos()
+
+  print *, ' Minimizing the angles between the TC orbitals'
+  i = 1
+  do while (.not. good_angles)
+    print *, ' iteration = ', i
+    call routine_save_rotated_mos(thr_deg, good_angles)
+    thr_deg *= 10.d0
+    i += 1
+    if(i .gt. 100) then
+      print *, ' minimize_tc_orb_angles does not seem to converge ..'
+      print *, ' Something is weird in the tc orbitals ...'
+      print *, ' STOPPING'
+      stop
+    endif
+  enddo
+  print *, ' Converged ANGLES MINIMIZATION !!'
+
+  call print_angles_tc()
+  call print_energy_and_mos()
+
+end
+
+! ---
+
+subroutine routine_save_rotated_mos(thr_deg, good_angles)
+
+  implicit none
+
+  double precision, intent(in)  :: thr_deg
+  logical,          intent(out) :: good_angles
+
+  integer                       :: i, j, k, n_degen_list, m, n, n_degen, ilast, ifirst
+  double precision              :: max_angle, norm
+  integer,          allocatable :: list_degen(:,:)
+  double precision, allocatable :: new_angles(:)
+  double precision, allocatable :: mo_r_coef_good(:,:), mo_l_coef_good(:,:)
+  double precision, allocatable :: mo_r_coef_new(:,:)
+  double precision, allocatable :: fock_diag(:),s_mat(:,:)
+  double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:)
+  double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:)
+
+  good_angles = .False.
+
+  allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num))
+
+  print *, ' ***************************************'
+  print *, ' ***************************************'
+  print *, ' THRESHOLD FOR DEGENERACIES ::: ', thr_deg
+  print *, ' ***************************************'
+  print *, ' ***************************************'
+  print *, ' Starting with the following TC energy gradient :', grad_non_hermit
+
+  mo_r_coef_good = mo_r_coef
+  mo_l_coef_good = mo_l_coef
+
+  allocate(mo_r_coef_new(ao_num, mo_num))
+  mo_r_coef_new = mo_r_coef
+  do i = 1, mo_num
+    norm = 1.d0/dsqrt(overlap_mo_r(i,i))
+    do j = 1, ao_num
+      mo_r_coef_new(j,i) *= norm
+    enddo
+  enddo
+
+  allocate(list_degen(mo_num,0:mo_num), s_mat(mo_num,mo_num), fock_diag(mo_num))
+  do i = 1, mo_num
+    fock_diag(i) = Fock_matrix_tc_mo_tot(i,i)
+  enddo
+
+ ! compute the overlap between the left and rescaled right
+  call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat)
+! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list)
+  call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list)
+  print *, ' fock_matrix_mo'
+  do i = 1, mo_num
+    print *, i, fock_diag(i), angle_left_right(i)
+  enddo
+   
+  do i = 1, n_degen_list
+!  ifirst = list_degen(1,i)
+!  ilast  = list_degen(2,i)
+!  n_degen = ilast - ifirst +1
+
+    n_degen = list_degen(i,0)
+    if(n_degen .eq. 1) cycle
+
+    allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen))
+    allocate(mo_r_coef_tmp(ao_num,n_degen), mo_l_coef_tmp(ao_num,n_degen), mo_l_coef_new(ao_num,n_degen))
+    allocate(T(n_degen,n_degen), Snew(n_degen,n_degen))
+
+    do j = 1, n_degen
+      mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j))
+      mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j))
+    enddo
+    ! Orthogonalization of right functions
+    print *, ' Orthogonalization of RIGHT functions'
+    print *, ' ------------------------------------'
+    call orthog_functions(ao_num, n_degen, mo_r_coef_tmp, ao_overlap)
+  
+    ! Orthogonalization of left functions
+    print *, ' Orthogonalization of LEFT functions'
+    print *, ' ------------------------------------'
+    call orthog_functions(ao_num, n_degen, mo_l_coef_tmp, ao_overlap)
+
+    print *, ' Overlap left-right '
+    call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp)
+    do j = 1, n_degen
+     write(*,'(100(F8.4,X))') stmp(:,j)
+    enddo
+    call build_s_matrix(ao_num, n_degen, mo_l_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp)
+
+    !print*,'LEFT/LEFT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'RIGHT/RIGHT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+
+    if(maxovl_tc) then
+      T    = 0.d0
+      Snew = 0.d0
+      call maxovl(n_degen, n_degen, stmp, T, Snew)
+    !print*,'overlap after'
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')Snew(:,j)
+    !enddo
+      call dgemm( 'N', 'N', ao_num, n_degen, n_degen, 1.d0                  &
+                , mo_l_coef_tmp, size(mo_l_coef_tmp, 1), T(1,1), size(T, 1) &
+                , 0.d0, mo_l_coef_new, size(mo_l_coef_new, 1) )
+     call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'Overlap test'
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    else 
+      mo_l_coef_new = mo_l_coef_tmp
+    endif
+
+    call impose_weighted_biorthog_svd(ao_num, n_degen, ao_overlap, mo_l_coef_new, mo_r_coef_tmp)
+
+    !call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'LAST OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    !call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_l_coef_new, ao_overlap, stmp)
+    !print*,'LEFT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    !call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'RIGHT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    do j = 1, n_degen
+!!!   mo_l_coef_good(1:ao_num,j+ifirst-1) = mo_l_coef_new(1:ao_num,j)
+!!!   mo_r_coef_good(1:ao_num,j+ifirst-1) = mo_r_coef_tmp(1:ao_num,j)
+      mo_l_coef_good(1:ao_num,list_degen(i,j)) = mo_l_coef_new(1:ao_num,j)
+      mo_r_coef_good(1:ao_num,list_degen(i,j)) = mo_r_coef_tmp(1:ao_num,j)
+    enddo
+
+    deallocate(stmp, smat2)
+    deallocate(mo_r_coef_tmp, mo_l_coef_tmp, mo_l_coef_new)
+    deallocate(T, Snew)
+  enddo
+
+  !allocate(stmp(mo_num, mo_num))
+  !call build_s_matrix(ao_num, mo_num, mo_l_coef_good, mo_r_coef_good, ao_overlap, stmp)
+  !print*,'LEFT/RIGHT OVERLAP '
+  !do j = 1, mo_num
+  ! write(*,'(100(F16.10,X))')stmp(:,j)
+  !enddo
+  !call build_s_matrix(ao_num, mo_num, mo_l_coef_good, mo_l_coef_good, ao_overlap, stmp)
+  !print*,'LEFT/LEFT OVERLAP '
+  !do j = 1, mo_num
+  ! write(*,'(100(F16.10,X))')stmp(:,j)
+  !enddo
+  !call build_s_matrix(ao_num, mo_num, mo_r_coef_good, mo_r_coef_good, ao_overlap, stmp)
+  !print*,'RIGHT/RIGHT OVERLAP '
+  !do j = 1, mo_num
+  ! write(*,'(100(F16.10,X))')stmp(:,j)
+  !enddo
+
+  mo_r_coef = mo_r_coef_good
+  mo_l_coef = mo_l_coef_good
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+  TOUCH mo_l_coef mo_r_coef
+
+  allocate(new_angles(mo_num))
+  new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num))
+  max_angle = maxval(new_angles)
+  good_angles = max_angle.lt.45.d0
+  print *, ' max_angle = ', max_angle
+  
+end
+
+! ---
+
+subroutine build_s_matrix(m, n, C1, C2, overlap, smat)
+
+  implicit none
+  integer,          intent(in)  :: m, n
+  double precision, intent(in)  :: C1(m,n), C2(m,n), overlap(m,m)
+  double precision, intent(out) :: smat(n,n)
+  integer                       :: i, j, k, l
+  double precision, allocatable :: S_tmp(:,:)
+
+  smat = 0.d0
+
+  !do i = 1, n
+  !  do j = 1, n
+  !    do k = 1, m
+  !      do l = 1, m
+  !        smat(i,j) += C1(k,i) * overlap(l,k) * C2(l,j) 
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  ! C1.T x overlap
+  allocate(S_tmp(n,m))
+  call dgemm( 'T', 'N', n, m, m, 1.d0                    &
+            , C1, size(C1, 1), overlap, size(overlap, 1) &
+            , 0.d0, S_tmp, size(S_tmp, 1) )
+  ! C1.T x overlap x C2
+  call dgemm( 'N', 'N', n, n, m, 1.d0                     &
+            , S_tmp, size(S_tmp, 1), C2(1,1), size(C2, 1) &
+            , 0.d0, smat, size(smat, 1) )
+  deallocate(S_tmp)
+
+end
+
+! ---
+
+subroutine orthog_functions(m, n, coef, overlap)
+
+  implicit none
+
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: overlap(m,m)
+  double precision, intent(inout) :: coef(m,n)
+  double precision, allocatable   :: stmp(:,:)
+  integer                         :: j, k
+
+  allocate(stmp(n,n))
+  call build_s_matrix(m, n, coef, coef, overlap, stmp)
+! print*,'overlap before'
+! do j = 1, n
+!  write(*,'(100(F16.10,X))')stmp(:,j)
+! enddo
+  call impose_orthog_svd_overlap(m, n, coef, overlap)
+  call build_s_matrix(m, n, coef, coef, overlap, stmp)
+  do j = 1, n
+    ! ---
+    ! TODO: MANU check ici
+    !coef(1,:m) *= 1.d0/dsqrt(stmp(j,j))
+    do k = 1, m
+      coef(k,j) *= 1.d0/dsqrt(stmp(j,j))
+    enddo
+    ! ---
+  enddo
+  call build_s_matrix(m, n, coef, coef, overlap, stmp)
+
+ !print*,'overlap after'
+ !do j = 1, n
+ ! write(*,'(100(F16.10,X))')stmp(:,j)
+ !enddo
+
+ deallocate(stmp)
+
+end
+
+! ---
+
+subroutine print_angles_tc()
+
+  implicit none
+  integer          :: i, j
+  double precision :: left, right
+
+  print *, ' product of norms, angle between vectors'                                                                  
+  do i = 1, mo_num
+    left  = overlap_mo_l(i,i)
+    right = overlap_mo_r(i,i)
+!  print*,Fock_matrix_tc_mo_tot(i,i),left*right,angle_left_right(i)
+    print *, left*right, angle_left_right(i)
+  enddo
+
+end
+
+! ---
+
+subroutine print_energy_and_mos()
+
+  implicit none
+  integer :: i
+
+  print *, ' '
+  print *, ' TC energy = ', TC_HF_energy
+  print *, ' TC SCF energy gradient = ', grad_non_hermit
+  print *, ' Max angle Left/right   = ', max_angle_left_right
+
+  if(max_angle_left_right .lt. 45.d0) then
+    print *, ' Maximum angle BELOW 45 degrees, everthing is OK !'
+  else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then
+    print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...'
+  else if(max_angle_left_right .gt. 75.d0) then
+    print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...'
+  endif
+
+  print *, ' Diag Fock elem, product of left/right norm, angle left/right '
+  do i = 1, mo_num
+    write(*, '(I3,X,100(F16.10,X))') i, Fock_matrix_tc_mo_tot(i,i), overlap_mo_l(i,i)*overlap_mo_r(i,i), angle_left_right(i)
+  enddo
+
+end
+
+! ---
+
+subroutine sort_by_tc_fock
+ implicit none 
+ integer, allocatable :: iorder(:)
+ double precision, allocatable :: mo_l_tmp(:,:), mo_r_tmp(:,:),fock(:)
+ allocate(iorder(mo_num),fock(mo_num),mo_l_tmp(ao_num, mo_num),mo_r_tmp(ao_num,mo_num))
+ integer :: i
+ mo_l_tmp = mo_l_coef
+ mo_r_tmp = mo_r_coef
+ do i = 1, mo_num
+  iorder(i) = i
+  fock(i) = Fock_matrix_tc_mo_tot(i,i)
+ enddo
+ call dsort(fock,iorder,mo_num)
+ do i = 1, mo_num
+  mo_l_coef(1:ao_num,i) = mo_l_tmp(1:ao_num,iorder(i))
+  mo_r_coef(1:ao_num,i) = mo_r_tmp(1:ao_num,iorder(i))
+ enddo
+ touch mo_l_coef mo_r_coef
+
+end
+
diff --git a/src/tc_scf/tc_petermann_factor.irp.f b/src/tc_scf/tc_petermann_factor.irp.f
new file mode 100644
index 00000000..d3722098
--- /dev/null
+++ b/src/tc_scf/tc_petermann_factor.irp.f
@@ -0,0 +1,78 @@
+
+! ---
+
+program tc_petermann_factor
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer                       :: i, j
+  double precision              :: Pf_diag_av
+  double precision, allocatable :: Sl(:,:), Sr(:,:), Pf(:,:)
+
+  allocate(Sl(mo_num,mo_num), Sr(mo_num,mo_num), Pf(mo_num,mo_num))
+
+  call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0                       &
+            , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Sl, size(Sl, 1) )
+
+  print *, ''
+  print *, ' left-orthog matrix:'
+  do i = 1, mo_num
+    write(*,'(100(F8.4,X))') Sl(:,i)
+  enddo
+
+  call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0                       &
+            , mo_r_coef, size(mo_r_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, Sr, size(Sr, 1) )
+
+  print *, ''
+  print *, ' right-orthog matrix:'
+  do i = 1, mo_num
+    write(*,'(100(F8.4,X))') Sr(:,i)
+  enddo
+
+  print *, ''
+  print *, ' Petermann matrix:'
+  do i = 1, mo_num
+    do j = 1, mo_num
+      Pf(j,i) = Sl(j,i) * Sr(j,i)
+    enddo
+    write(*,'(100(F8.4,X))') Pf(:,i)
+  enddo
+
+  Pf_diag_av = 0.d0
+  do i = 1, mo_num
+    Pf_diag_av = Pf_diag_av + Pf(i,i)
+  enddo
+  Pf_diag_av = Pf_diag_av / dble(mo_num)
+
+  print *, ''
+  print *, ' mean of the diagonal Petermann factor = ', Pf_diag_av
+
+  deallocate(Sl, Sr, Pf)
+
+  return
+end subroutine
+
+! ---
+
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
new file mode 100644
index 00000000..deaf8d82
--- /dev/null
+++ b/src/tc_scf/tc_scf.irp.f
@@ -0,0 +1,75 @@
+! ---
+
+program tc_scf
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, 'starting ...'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf 
+  print *, ' mu = ', mu_erf
+  PROVIDE j1b_type
+  print *, ' j1b_type = ', j1b_type
+  print *, j1b_pen
+
+  !call create_guess()
+  !call orthonormalize_mos()
+
+  PROVIDE tcscf_algorithm
+  if(tcscf_algorithm == 'DIIS') then
+    call rh_tcscf_diis()
+  elseif(tcscf_algorithm == 'Simple') then
+    call rh_tcscf_simple()
+  else
+    print *, ' not implemented yet', tcscf_algorithm
+    stop
+  endif
+
+  call minimize_tc_orb_angles()
+  call print_energy_and_mos()
+
+end
+
+! ---
+
+subroutine create_guess()
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+  !call ezfio_has_mo_basis_mo_coef(exists)
+  exists = .false.
+
+  if(.not.exists) then
+    mo_label = 'Guess'
+    if(mo_guess_type == "HCore") then
+      mo_coef = ao_ortho_lowdin_coef
+      call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
+      TOUCH mo_coef
+      call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals, size(mo_one_e_integrals, 1), size(mo_one_e_integrals, 2), mo_label, 1, .false.)
+      call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
+      SOFT_TOUCH mo_coef
+    elseif (mo_guess_type == "Huckel") then
+      call huckel_guess
+    else
+      print *,  'Unrecognized MO guess type : '//mo_guess_type
+      stop 1
+    endif
+    SOFT_TOUCH mo_label
+  endif
+
+end subroutine create_guess
+
+! ---
diff --git a/src/tc_scf/tc_scf_dm.irp.f b/src/tc_scf/tc_scf_dm.irp.f
new file mode 100644
index 00000000..90719f47
--- /dev/null
+++ b/src/tc_scf/tc_scf_dm.irp.f
@@ -0,0 +1,37 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) ]
+
+  implicit none
+
+  if(bi_ortho) then
+    PROVIDE mo_l_coef mo_r_coef
+    TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta
+  else
+    TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta
+  endif
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num) ]
+
+  implicit none
+
+  if(bi_ortho) then
+    PROVIDE mo_l_coef mo_r_coef
+    TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha
+  else
+    TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha
+  endif
+END_PROVIDER 
+
+
+! ---
+
+BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num) ]
+  implicit none
+  TCSCF_density_matrix_ao_tot = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha
+END_PROVIDER 
+
+
diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f
new file mode 100644
index 00000000..611b8b4c
--- /dev/null
+++ b/src/tc_scf/tc_scf_energy.irp.f
@@ -0,0 +1,34 @@
+
+ BEGIN_PROVIDER [ double precision, TC_HF_energy]
+&BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy]
+&BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy]
+
+  BEGIN_DOC
+  ! TC Hartree-Fock energy containing the nuclear repulsion, and its one- and two-body components.
+  END_DOC
+
+  implicit none
+  integer :: i, j
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  TC_HF_energy = nuclear_repulsion
+  TC_HF_one_e_energy = 0.d0
+  TC_HF_two_e_energy = 0.d0
+
+  do j = 1, ao_num
+    do i = 1, ao_num
+      TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_non_hermit_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) &
+                                    + two_e_tc_non_hermit_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) )
+      TC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) &
+                          * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
+    enddo
+  enddo
+
+  TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy
+  TC_HF_energy += diag_three_elem_hf
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/tc_scf/tc_scf_utils.irp.f b/src/tc_scf/tc_scf_utils.irp.f
new file mode 100644
index 00000000..dde477c4
--- /dev/null
+++ b/src/tc_scf/tc_scf_utils.irp.f
@@ -0,0 +1,43 @@
+
+! ---
+
+subroutine LTxSxR(n, m, L, S, R, C)
+
+  implicit none
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: L(n,m), S(n,n), R(n,m)
+  double precision, intent(out) :: C(m,m)
+  integer                       :: i, j
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: tmp(:,:)
+
+  ! L.T x S x R
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0      &
+            , L, size(L, 1), S, size(S, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0          &
+            , tmp, size(tmp, 1), R, size(R, 1) &
+            , 0.d0, C, size(C, 1) )
+  deallocate(tmp)
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(j.eq.i) then
+        accu_d += dabs(C(j,i))
+      else
+        accu_nd += C(j,i) * C(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  print*, ' accu_d  = ', accu_d
+  print*, ' accu_nd = ', accu_nd
+
+end subroutine LTxR
+
+! ---
+
diff --git a/src/tc_scf/test_Ne.sh b/src/tc_scf/test_Ne.sh
new file mode 100755
index 00000000..27ea73c2
--- /dev/null
+++ b/src/tc_scf/test_Ne.sh
@@ -0,0 +1,13 @@
+QP_ROOT=/home/eginer/new_qp2/qp2
+source ${QP_ROOT}/quantum_package.rc
+  echo Ne > Ne.xyz
+  echo $QP_ROOT
+  qp create_ezfio -b cc-pcvdz Ne.xyz 
+  qp run scf 
+  qp set tc_keywords bi_ortho True 
+  qp set ao_two_e_erf_ints mu_erf 0.87 
+  qp set tc_keywords j1b_pen [1.5]
+  qp set tc_keywords j1b_type 3 
+  qp run tc_scf | tee Ne.ezfio.tc_scf.out 
+  grep "TC energy =" Ne.ezfio.tc_scf.out | tail -1 
+  eref=-128.552134
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
new file mode 100644
index 00000000..a14c4126
--- /dev/null
+++ b/src/tc_scf/test_int.irp.f
@@ -0,0 +1,1003 @@
+program test_ints
+
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting test_ints ...'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 15 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  my_extra_grid_becke = .True.
+  my_n_pt_r_extra_grid = 30
+  my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug
+  touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
+
+!! OK 
+!call routine_int2_u_grad1u_j1b2 
+!! OK
+!call routine_v_ij_erf_rk_cst_mu_j1b
+!! OK 
+! call routine_x_v_ij_erf_rk_cst_mu_j1b
+!! OK
+! call routine_v_ij_u_cst_mu_j1b
+
+!! OK
+!call routine_int2_u2_j1b2
+
+!! OK
+!call routine_int2_u_grad1u_x_j1b2
+
+!! OK 
+! call routine_int2_grad1u2_grad2u2_j1b2
+! call routine_int2_u_grad1u_j1b2
+! call test_total_grad_lapl
+! call test_total_grad_square
+! call test_ao_tc_int_chemist
+! call test_grid_points_ao
+! call test_tc_scf
+ !call test_int_gauss
+
+  !call test_fock_3e_uhf_ao()
+  !call test_fock_3e_uhf_mo()
+
+  !call test_tc_grad_and_lapl_ao()
+  !call test_tc_grad_square_ao()
+
+  call test_two_e_tc_non_hermit_integral()
+
+end
+
+! ---
+
+subroutine test_tc_scf
+ implicit none
+ integer :: i
+! provide int2_u_grad1u_x_j1b2_test
+ provide x_v_ij_erf_rk_cst_mu_j1b_test
+! provide x_v_ij_erf_rk_cst_mu_j1b_test
+! print*,'TC_HF_energy = ',TC_HF_energy
+! print*,'grad_non_hermit = ',grad_non_hermit
+end
+
+subroutine test_ao_tc_int_chemist
+ implicit none
+ provide ao_tc_int_chemist
+! provide ao_tc_int_chemist_test
+! provide tc_grad_square_ao_test
+! provide tc_grad_and_lapl_ao_test
+end
+
+! ---
+
+subroutine routine_test_j1b
+ implicit none
+ integer :: i,icount,j
+ icount = 0
+ do i = 1, List_all_comb_b3_size
+  if(dabs(List_all_comb_b3_coef(i)).gt.1.d-10)then
+   print*,''
+   print*,List_all_comb_b3_expo(i),List_all_comb_b3_coef(i)
+   print*,List_all_comb_b3_cent(1:3,i)
+   print*,''
+   icount += 1
+  endif
+  
+ enddo
+ print*,'List_all_comb_b3_coef,icount = ',List_all_comb_b3_size,icount
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do icount = 1, List_comb_thr_b3_size(j,i)
+    print*,'',j,i
+    print*,List_comb_thr_b3_expo(icount,j,i),List_comb_thr_b3_coef(icount,j,i)
+    print*,List_comb_thr_b3_cent(1:3,icount,j,i)
+    print*,''
+   enddo
+!   enddo
+  enddo
+ enddo
+ print*,'max_List_comb_thr_b3_size = ',max_List_comb_thr_b3_size,List_all_comb_b3_size
+
+end
+
+subroutine routine_int2_u_grad1u_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u_grad1u_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u_grad1u_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_erf_rk_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_x_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += x_v_ij_erf_rk_cst_mu_j1b     (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+
+subroutine routine_v_ij_u_cst_mu_j1b_test
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_int2_grad1u2_grad2u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ integer :: ii , jj
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+ double precision, allocatable :: ints(:,:,:)
+ allocate(ints(ao_num, ao_num, n_points_final_grid))
+! do ipoint = 1, n_points_final_grid
+!  do i = 1, ao_num
+!   do j = 1, ao_num
+!    read(33,*)ints(j,i,ipoint)
+!   enddo
+!  enddo
+! enddo
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!        print*,j,i,ipoint
+!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint))
+!        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint))
+!        stop
+!       endif
+!      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ double precision :: e_ref, e_new
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+ e_ref = 0.d0
+ e_new = 0.d0
+ do ii = 1, elec_alpha_num
+  do jj = ii, elec_alpha_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     do i = 1, ao_num
+      do j = 1, ao_num
+       e_ref += mo_coef(j,ii) * mo_coef(i,ii) * array_ref(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       e_new += mo_coef(j,ii) * mo_coef(i,ii) * array(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+       accu_abs += contrib
+!       if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+!        accu_relat += contrib/dabs(array_ref(j,i,l,k))
+!       endif
+      enddo
+     enddo
+    enddo
+   enddo
+
+  enddo
+ enddo
+ print*,'e_ref = ',e_ref
+ print*,'e_new = ',e_new
+! print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+! print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_int2_u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_int2_u_grad1u_x_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += int2_u_grad1u_x_j1b2_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += int2_u_grad1u_x_j1b2     (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_u_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+end
+
+! ---
+
+subroutine test_fock_3e_uhf_ao()
+
+  implicit none
+  integer                       :: i, j
+  double precision              :: diff_tot, diff_ij, thr_ih, norm
+  double precision, allocatable :: fock_3e_uhf_ao_a_mo(:,:), fock_3e_uhf_ao_b_mo(:,:)
+
+  thr_ih = 1d-7
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_ao_a fock_3e_uhf_ao_b
+
+  ! ---
+
+  allocate(fock_3e_uhf_ao_a_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_a   , size(fock_3e_uhf_ao_a   , 1) &
+                        , fock_3e_uhf_ao_a_mo, size(fock_3e_uhf_ao_a_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_a_mo(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_a_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_a_mo)
+
+  ! ---
+
+  allocate(fock_3e_uhf_ao_b_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_b   , size(fock_3e_uhf_ao_b   , 1) &
+                        , fock_3e_uhf_ao_b_mo, size(fock_3e_uhf_ao_b_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_b_mo(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_b_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_b_mo)
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_ao()
+
+! ---
+
+subroutine test_fock_3e_uhf_mo()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff_ij, thr_ih, norm
+
+  thr_ih = 1d-12
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_b
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_a     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, '      norm_a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_b     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, '      norm_b = ', norm
+  print *, ' '
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_mo
+
+! ---
+
+subroutine test_total_grad_lapl
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_and_lapl_ao_test(j,i,l,k) - tc_grad_and_lapl_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_and_lapl_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_and_lapl_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_total_grad_square
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_square_ao_test(j,i,l,k) - tc_grad_square_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_square_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_square_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_grid_points_ao
+ implicit none
+ integer :: i,j,ipoint,icount,icount_good, icount_bad,icount_full
+ double precision :: thr
+ thr = 1.d-10
+! print*,'max_n_pts_grid_ao_prod = ',max_n_pts_grid_ao_prod
+! print*,'n_pts_grid_ao_prod'
+ do i = 1, ao_num
+  do j = i, ao_num
+  icount = 0
+  icount_good = 0
+  icount_bad = 0
+  icount_full = 0
+  do ipoint = 1, n_points_final_grid
+!   if(dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,1)) & 
+!    + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,2)) &
+!    + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,3)) )
+!   if(dabs(int2_u2_j1b2_test(j,i,ipoint)).gt.thr)then
+!    icount += 1
+!   endif
+   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_full += 1
+   endif
+   if(dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)).gt.thr)then
+    icount += 1
+    if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_good += 1
+    else
+    print*,j,i,ipoint
+    print*,dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint))/dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint))
+    icount_bad  += 1
+    endif
+   endif
+!   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr)then
+!   endif
+  enddo
+   print*,''
+   print*,j,i
+   print*,icount,icount_full, icount_bad!,n_pts_grid_ao_prod(j,i)
+   print*,dble(icount)/dble(n_points_final_grid),dble(icount_full)/dble(n_points_final_grid)
+!          dble(n_pts_grid_ao_prod(j,i))/dble(n_points_final_grid)
+!   if(icount.gt.n_pts_grid_ao_prod(j,i))then
+!    print*,'pb !!'
+!   endif
+  enddo
+ enddo
+end
+
+subroutine test_int_gauss
+ implicit none
+ integer :: i,j
+ print*,'center'
+ do i = 1, ao_num
+  do j = i, ao_num
+   print*,j,i
+   print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i)
+   print*,ao_prod_center(1:3,j,i)
+  enddo
+ enddo
+ print*,''
+ double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2
+ center = 0.d0
+ pi = dacos(-1.d0)
+ integral_1 = 0.d0
+ integral_2 = 0.d0
+ alpha = 0.75d0
+ do i = 1,  n_points_final_grid
+  ! you get x, y and z of the ith grid point
+  r(1) = final_grid_points(1,i)
+  r(2) = final_grid_points(2,i)
+  r(3) = final_grid_points(3,i)
+  weight = final_weight_at_r_vector(i)
+  distance = dsqrt( (r(1) - center(1))**2 +  (r(2) - center(2))**2 + (r(3) - center(3))**2 )
+  f_r = dexp(-alpha * distance*distance)
+  ! you add the contribution of the grid point to the integral
+  integral_1 += f_r * weight
+  integral_2 += f_r * distance * weight
+ enddo
+ print*,'integral_1      =',integral_1
+ print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5
+ print*,'integral_2      =',integral_2
+ print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2
+
+
+end
+
+! ---
+
+subroutine test_tc_grad_and_lapl_ao()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE tc_grad_and_lapl_ao tc_grad_and_lapl_ao_loop
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          diff = dabs(tc_grad_and_lapl_ao_loop(l,k,j,i) - tc_grad_and_lapl_ao(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' loops : ', tc_grad_and_lapl_ao_loop(l,k,j,i)
+            print *, ' lapack: ', tc_grad_and_lapl_ao     (l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_and_lapl_ao_loop(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+
+end
+
+! ---
+
+subroutine test_tc_grad_square_ao()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE tc_grad_square_ao tc_grad_square_ao_loop
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          diff = dabs(tc_grad_square_ao_loop(l,k,j,i) - tc_grad_square_ao(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' loops : ', tc_grad_square_ao_loop(l,k,j,i)
+            print *, ' lapack: ', tc_grad_square_ao     (l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_square_ao_loop(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+
+end
+
+! ---
+
+subroutine test_two_e_tc_non_hermit_integral()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE two_e_tc_non_hermit_integral_beta two_e_tc_non_hermit_integral_alpha
+  PROVIDE two_e_tc_non_hermit_integral_seq_beta two_e_tc_non_hermit_integral_seq_alpha
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      diff = dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i) - two_e_tc_non_hermit_integral_alpha(j,i))
+      if(diff .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' seq         : ', two_e_tc_non_hermit_integral_seq_alpha(j,i)
+        print *, ' //          : ', two_e_tc_non_hermit_integral_alpha    (j,i)
+        !stop
+      endif
+
+      norm     += dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i))
+      diff_tot += diff
+    enddo
+  enddo
+
+  print *, ' diff tot a = ', diff_tot / norm
+  print *, '     norm a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      diff = dabs(two_e_tc_non_hermit_integral_seq_beta(j,i) - two_e_tc_non_hermit_integral_beta(j,i))
+      if(diff .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' seq         : ', two_e_tc_non_hermit_integral_seq_beta(j,i)
+        print *, ' //          : ', two_e_tc_non_hermit_integral_beta    (j,i)
+        !stop
+      endif
+
+      norm     += dabs(two_e_tc_non_hermit_integral_seq_beta(j,i))
+      diff_tot += diff
+    enddo
+  enddo
+
+  print *, ' diff tot b = ', diff_tot / norm
+  print *, '     norm b = ', norm
+  print *, ' '
+
+  ! ---
+
+  return
+
+end
+
+! ---
+
+>>>>>>> 92a4e33f8a21717cab0c0e4f8412ed6903afb04a
diff --git a/src/tc_scf/three_e_energy_bi_ortho.irp.f b/src/tc_scf/three_e_energy_bi_ortho.irp.f
new file mode 100644
index 00000000..64212da8
--- /dev/null
+++ b/src/tc_scf/three_e_energy_bi_ortho.irp.f
@@ -0,0 +1,174 @@
+
+subroutine contrib_3e_diag_sss(i,j,k,integral)
+ implicit none
+ integer, intent(in) :: i,j,k
+ BEGIN_DOC
+ ! returns the pure same spin contribution to diagonal matrix element of 3e term
+ END_DOC
+ double precision, intent(out) :: integral
+ double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+ call  give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int )!!! < i k j | i k j >
+ call  give_integrals_3_body_bi_ort(i, k, j, j, i, k, c_3_int)      ! < i k j | j i k >
+ call  give_integrals_3_body_bi_ort(i, k, j, k, j, i, c_minus_3_int)! < i k j | k j i >
+ integral = direct_int + c_3_int + c_minus_3_int 
+ ! negative terms :: exchange contrib
+ call  give_integrals_3_body_bi_ort(i, k, j, j, k, i, exch_13_int)!!! < i k j | j k i > : E_13 
+ call  give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)!!! < i k j | i j k > : E_23
+ call  give_integrals_3_body_bi_ort(i, k, j, k, i, j, exch_12_int)!!! < i k j | k i j > : E_12
+ integral += - exch_13_int - exch_23_int  - exch_12_int 
+ integral = -integral
+end
+
+subroutine contrib_3e_diag_soo(i,j,k,integral)
+ implicit none
+ integer, intent(in) :: i,j,k
+ BEGIN_DOC
+ ! returns the pure same spin contribution to diagonal matrix element of 3e term
+ END_DOC
+ double precision, intent(out) :: integral
+ double precision :: direct_int, exch_23_int
+ call  give_integrals_3_body_bi_ort(i, k, j, i, k, j, direct_int) ! < i k j | i k j >
+ call  give_integrals_3_body_bi_ort(i, k, j, i, j, k, exch_23_int)! < i k j | i j k > : E_23
+ integral = direct_int - exch_23_int 
+ integral = -integral
+end
+
+
+subroutine give_aaa_contrib_bis(integral_aaa)
+ implicit none
+ double precision, intent(out) :: integral_aaa
+ double precision :: integral
+ integer :: i,j,k
+ integral_aaa = 0.d0
+ do i = 1, elec_alpha_num
+  do j = i+1, elec_alpha_num
+   do k = j+1, elec_alpha_num
+    call contrib_3e_diag_sss(i,j,k,integral)
+    integral_aaa += integral
+   enddo
+  enddo
+ enddo
+
+end
+
+subroutine give_aaa_contrib(integral_aaa)
+ implicit none
+ double precision, intent(out) :: integral_aaa
+ double precision :: integral
+ integer :: i,j,k
+ integral_aaa = 0.d0
+ do i = 1, elec_alpha_num
+  do j = 1, elec_alpha_num
+   do k = 1, elec_alpha_num
+    call contrib_3e_diag_sss(i,j,k,integral)
+    integral_aaa += integral
+   enddo
+  enddo
+ enddo
+ integral_aaa *= 1.d0/6.d0 
+end
+
+
+subroutine give_aab_contrib(integral_aab)
+ implicit none
+ double precision, intent(out) :: integral_aab
+ double precision :: integral
+ integer :: i,j,k
+ integral_aab = 0.d0
+ do i = 1, elec_beta_num
+  do j = 1, elec_alpha_num
+   do k = 1, elec_alpha_num
+    call contrib_3e_diag_soo(i,j,k,integral)
+    integral_aab += integral
+   enddo
+  enddo
+ enddo
+ integral_aab *= 0.5d0
+end
+
+
+subroutine give_aab_contrib_bis(integral_aab)
+ implicit none
+ double precision, intent(out) :: integral_aab
+ double precision :: integral
+ integer :: i,j,k
+ integral_aab = 0.d0
+ do i = 1, elec_beta_num
+  do j = 1, elec_alpha_num
+   do k = j+1, elec_alpha_num
+    call contrib_3e_diag_soo(i,j,k,integral)
+    integral_aab += integral
+   enddo
+  enddo
+ enddo
+end
+
+
+subroutine give_abb_contrib(integral_abb)
+ implicit none
+ double precision, intent(out) :: integral_abb
+ double precision :: integral
+ integer :: i,j,k
+ integral_abb = 0.d0
+ do i = 1, elec_alpha_num
+  do j = 1, elec_beta_num
+   do k = 1, elec_beta_num
+    call contrib_3e_diag_soo(i,j,k,integral)
+    integral_abb += integral
+   enddo
+  enddo
+ enddo
+ integral_abb *= 0.5d0
+end
+
+subroutine give_abb_contrib_bis(integral_abb)
+ implicit none
+ double precision, intent(out) :: integral_abb
+ double precision :: integral
+ integer :: i,j,k
+ integral_abb = 0.d0
+ do i = 1, elec_alpha_num
+  do j = 1, elec_beta_num
+   do k = j+1, elec_beta_num
+    call contrib_3e_diag_soo(i,j,k,integral)
+    integral_abb += integral
+   enddo
+  enddo
+ enddo
+end
+
+subroutine give_bbb_contrib_bis(integral_bbb)
+ implicit none
+ double precision, intent(out) :: integral_bbb
+ double precision :: integral
+ integer :: i,j,k
+ integral_bbb = 0.d0
+ do i = 1, elec_beta_num
+  do j = i+1, elec_beta_num
+   do k = j+1, elec_beta_num
+    call contrib_3e_diag_sss(i,j,k,integral)
+    integral_bbb += integral
+   enddo
+  enddo
+ enddo
+
+end
+
+subroutine give_bbb_contrib(integral_bbb)
+ implicit none
+ double precision, intent(out) :: integral_bbb
+ double precision :: integral
+ integer :: i,j,k
+ integral_bbb = 0.d0
+ do i = 1, elec_beta_num
+  do j = 1, elec_beta_num
+   do k = 1, elec_beta_num
+    call contrib_3e_diag_sss(i,j,k,integral)
+    integral_bbb += integral
+   enddo
+  enddo
+ enddo
+ integral_bbb *= 1.d0/6.d0 
+end
+
+
diff --git a/src/utils/block_diag_degen.irp.f b/src/utils/block_diag_degen.irp.f
new file mode 100644
index 00000000..188bfa58
--- /dev/null
+++ b/src/utils/block_diag_degen.irp.f
@@ -0,0 +1,218 @@
+
+subroutine diag_mat_per_fock_degen(fock_diag, mat_ref, n, thr_d, thr_nd, thr_deg, leigvec, reigvec, eigval)
+
+
+  BEGIN_DOC
+  !
+  ! subroutine that diagonalizes a matrix mat_ref BY BLOCK
+  !
+  ! the blocks are defined by the elements having the SAME DEGENERACIES in the entries "fock_diag"
+  !
+  ! examples : all elements having degeneracy 1 in fock_diag (i.e. not being degenerated) will be treated together
+  !
+  !          : all elements having degeneracy 2 in fock_diag (i.e. two elements are equal) will be treated together
+  !
+  !          : all elements having degeneracy 3 in fock_diag (i.e. two elements are equal) will be treated together
+  !
+  ! etc... the advantage is to guarentee no spurious mixing because of numerical problems. 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: fock_diag(n), mat_ref(n,n), thr_d, thr_nd, thr_deg
+  double precision, intent(out) :: leigvec(n,n), reigvec(n,n), eigval(n)
+
+  integer                       :: n_degen_list, n_degen,size_mat, i, j, k, icount, m, index_degen
+  integer                       :: ii, jj, i_good, j_good, n_real
+  integer                       :: icount_eigval
+  logical,          allocatable :: is_ok(:)
+  integer,          allocatable :: list_degen(:,:), list_same_degen(:)
+  integer,          allocatable :: iorder(:), list_degen_sorted(:)
+  double precision, allocatable :: leigvec_unsrtd(:,:), reigvec_unsrtd(:,:), eigval_unsrtd(:)
+  double precision, allocatable :: mat_tmp(:,:), eigval_tmp(:), leigvec_tmp(:,:), reigvec_tmp(:,:)
+
+  allocate(leigvec_unsrtd(n,n), reigvec_unsrtd(n,n), eigval_unsrtd(n))
+  leigvec_unsrtd = 0.d0
+  reigvec_unsrtd = 0.d0
+  eigval_unsrtd  = 0.d0
+
+  ! obtain degeneracies 
+  allocate(list_degen(n,0:n))
+  call give_degen_full_list(fock_diag, n, thr_deg, list_degen, n_degen_list)
+
+  allocate(iorder(n_degen_list), list_degen_sorted(n_degen_list))
+  do i = 1, n_degen_list
+    n_degen = list_degen(i,0) 
+    list_degen_sorted(i) = n_degen
+    iorder(i) = i
+  enddo
+
+  ! sort by number of degeneracies 
+  call isort(list_degen_sorted, iorder, n_degen_list)
+
+  allocate(is_ok(n_degen_list))
+  is_ok = .True.
+  icount_eigval = 0
+
+  ! loop over degeneracies 
+  do i = 1, n_degen_list
+    if(.not.is_ok(i)) cycle
+
+    is_ok(i) = .False.
+    n_degen  = list_degen_sorted(i)
+
+    print *, ' diagonalizing for n_degen = ', n_degen
+
+    k = 1
+
+   ! group all the entries having the same degeneracies 
+!!  do while (list_degen_sorted(i+k)==n_degen)
+    do m = i+1, n_degen_list
+      if(list_degen_sorted(m)==n_degen) then
+        is_ok(i+k) = .False.
+        k += 1
+      endif
+    enddo
+
+    print *, ' number of identical degeneracies = ', k
+    size_mat = k*n_degen  
+    print *, ' size_mat = ', size_mat
+    allocate(mat_tmp(size_mat,size_mat), list_same_degen(size_mat))
+    allocate(eigval_tmp(size_mat), leigvec_tmp(size_mat,size_mat), reigvec_tmp(size_mat,size_mat))
+    ! group all the elements sharing the same degeneracy
+    icount = 0
+    do j = 1, k ! jth set of degeneracy
+      index_degen = iorder(i+j-1)
+      do m = 1, n_degen
+        icount += 1
+        list_same_degen(icount) = list_degen(index_degen,m)
+      enddo
+    enddo
+
+    print *, ' list of elements '
+    do icount = 1, size_mat
+      print *, icount, list_same_degen(icount)
+    enddo
+
+    ! you copy subset of matrix elements having all the same degeneracy in mat_tmp
+    do ii = 1, size_mat
+      i_good = list_same_degen(ii)
+      do jj = 1, size_mat
+        j_good = list_same_degen(jj)
+        mat_tmp(jj,ii) = mat_ref(j_good,i_good)
+      enddo
+    enddo
+
+    call non_hrmt_bieig( size_mat, mat_tmp, thr_d, thr_nd &
+                       , leigvec_tmp, reigvec_tmp         & 
+                       , n_real, eigval_tmp )
+
+    do ii = 1, size_mat
+      icount_eigval += 1
+      eigval_unsrtd(icount_eigval) = eigval_tmp(ii) ! copy eigenvalues 
+      do jj = 1, size_mat ! copy the eigenvectors 
+        j_good = list_same_degen(jj)
+        leigvec_unsrtd(j_good,icount_eigval) = leigvec_tmp(jj,ii)
+        reigvec_unsrtd(j_good,icount_eigval) = reigvec_tmp(jj,ii)
+      enddo
+    enddo
+
+    deallocate(mat_tmp, list_same_degen)
+    deallocate(eigval_tmp, leigvec_tmp, reigvec_tmp)
+  enddo
+
+  if(icount_eigval .ne. n) then
+    print *, ' pb !! (icount_eigval.ne.n)'
+    print *, ' icount_eigval,n', icount_eigval, n
+    stop
+  endif
+ 
+  deallocate(iorder)
+  allocate(iorder(n))
+  do i = 1, n
+    iorder(i) = i
+  enddo
+  call dsort(eigval_unsrtd, iorder, n)
+
+  do i = 1, n
+    print*,'sorted eigenvalues '
+    i_good = iorder(i)
+    eigval(i) = eigval_unsrtd(i)
+    print*,'i,eigval(i) = ',i,eigval(i)
+    do j = 1, n
+      leigvec(j,i) = leigvec_unsrtd(j,i_good)
+      reigvec(j,i) = reigvec_unsrtd(j,i_good)
+    enddo
+  enddo
+
+  deallocate(leigvec_unsrtd, reigvec_unsrtd, eigval_unsrtd)
+  deallocate(list_degen)
+  deallocate(iorder, list_degen_sorted)
+  deallocate(is_ok)
+
+end
+
+! ---
+
+subroutine give_degen_full_list(A, n, thr, list_degen, n_degen_list)
+
+  BEGIN_DOC
+  ! you enter with an array A(n) and spits out all the elements degenerated up to thr
+  !
+  ! the elements of A(n) DON'T HAVE TO BE SORTED IN THE ENTRANCE: TOTALLY GENERAL 
+  !
+  ! list_degen(i,0) = number of degenerate entries 
+  !
+  ! list_degen(i,1) = index of the first degenerate entry
+  !
+  ! list_degen(i,2:list_degen(i,0)) = list of all other dengenerate entries 
+  !
+  ! if list_degen(i,0) == 1 it means that there is no degeneracy for that element
+  END_DOC
+
+  implicit none
+
+  double precision, intent(in)  :: A(n)
+  double precision, intent(in)  :: thr
+  integer,          intent(in)  :: n
+  integer,          intent(out) :: list_degen(n,0:n), n_degen_list
+  integer                       :: i, j, icount, icheck
+  logical, allocatable          :: is_ok(:)
+
+
+  allocate(is_ok(n))
+  n_degen_list = 0
+  is_ok = .True.
+  do i = 1, n
+    if(.not.is_ok(i)) cycle
+    n_degen_list +=1
+    is_ok(i) = .False.
+    list_degen(n_degen_list,1) = i
+    icount = 1
+    do j = i+1, n
+      if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then
+        is_ok(j) = .False.
+        icount += 1
+        list_degen(n_degen_list,icount) = j
+      endif
+    enddo
+
+    list_degen(n_degen_list,0) = icount
+  enddo
+
+  icheck = 0
+  do i = 1, n_degen_list
+    icheck += list_degen(i,0)
+  enddo
+
+  if(icheck.ne.n)then
+    print *, ' pb ! :: icheck.ne.n'
+    print *, icheck, n
+    stop
+  endif
+
+end
+
+! ---
+
diff --git a/src/utils/loc.f b/src/utils/loc.f
new file mode 100644
index 00000000..02693281
--- /dev/null
+++ b/src/utils/loc.f
@@ -0,0 +1,327 @@
+c************************************************************************
+      subroutine maxovl(n,m,s,t,w)
+C
+C     This subprogram contains an iterative procedure to find the
+C     unitary transformation of a set of n vectors which maximizes
+C     the sum of their square overlaps with a set of m reference
+C     vectors (m.le.n)
+C
+C     S: overlap matrix <ref|vec>
+C     T: rotation matrix
+C     W: new overlap matrix
+C
+C
+      implicit real*8(a-h,o-y),logical*1(z)
+!      parameter (id1=700)
+!      dimension s(id1,id1),t(id1,id1),w(id1,id1)
+      double precision, intent(inout) :: s(n,n)
+      double precision, intent(out) :: t(n,n),w(n,n)
+      data small/1.d-6/
+
+      zprt=.true.
+      niter=1000000
+      conv=1.d-12
+
+C      niter=1000000
+C      conv=1.d-6
+      write (6,5) n,m,conv
+    5 format (//5x,'Unitary transformation of',i3,'  vectors'/
+     * 5x,'following the principle of maximum overlap with a set of',
+     * i3,' reference vectors'/5x,'required convergence on rotation ',
+     * 'angle =',f13.10///5x,'Starting overlap matrix'/)
+      do i=1,m
+        write (6,145) i
+        write (6,150) (s(i,j),j=1,n)
+      end do
+    8 mm=m-1
+      if (m.lt.n) mm=m
+      iter=0
+      do j=1,n
+        do i=1,n
+          t(i,j)=0.d0
+        end do
+        do i=1,m
+          w(i,j)=s(i,j)
+        enddo
+        t(j,j)=1.d0
+      enddo
+      sum=0.d0
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      end do
+      sum=sum/m
+      if (zprt) write (6,12) sum
+   12 format (//5x,'Average square overlap =',f10.6)
+      if (n.eq.1) goto 100
+      last=n
+      j=1
+   21 if (j.ge.last) goto 30
+      sum=0.d0
+      do i=1,n
+        sum=sum+s(i,j)*s(i,j)
+      enddo
+      if (sum.gt.small) goto 28
+      do i=1,n
+        sij=s(i,j)
+        s(i,j)=-s(i,last)
+        s(i,last)=sij
+        tij=t(i,j)
+        t(i,j)=-t(i,last)
+        t(i,last)=tij
+      end do
+      last=last-1
+      goto 21
+   28 j=j+1
+      goto 21
+   30 iter=iter+1
+      imax=0
+      jmax=0
+      dmax=0.d0
+      amax=0.d0
+      do 60 i=1,mm
+      ip=i+1
+      do 50 j=ip,n
+      a=s(i,j)*s(i,j)-s(i,i)*s(i,i)
+      b=-s(i,i)*s(i,j)
+      if (j.gt.m) goto 31
+      a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j)
+      b=b+s(j,i)*s(j,j)
+   31 b=b+b
+      if (a.eq.0.d0) goto 32
+      ba=b/a
+      if (dabs(ba).gt.small) goto 32
+      if (a.gt.0.d0) goto 33
+      tang=-0.5d0*ba
+      cosine=1.d0/dsqrt(1.d0+tang*tang)
+      sine=tang*cosine
+      goto 34
+   32 tang=0.d0
+      if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b
+      cosine=1.d0/dsqrt(1.d0+tang*tang)
+      sine=tang*cosine
+      goto 34
+   33 cosine=0.d0
+      sine=1.d0
+   34 delta=sine*(a*sine+b*cosine)
+      if (zprt.and.delta.lt.0.d0) write (6,71) i,j,a,b,sine,cosine,delta
+      do k=1,m
+        p=s(k,i)*cosine-s(k,j)*sine
+        q=s(k,i)*sine+s(k,j)*cosine
+        s(k,i)=p
+        s(k,j)=q
+      enddo
+      do k=1,n
+        p=t(k,i)*cosine-t(k,j)*sine
+        q=t(k,i)*sine+t(k,j)*cosine
+        t(k,i)=p
+        t(k,j)=q
+      enddo
+   45 d=dabs(sine)
+      if (d.le.amax) goto 50
+      imax=i
+      jmax=j
+      amax=d
+      dmax=delta
+   50 continue
+   60 continue
+      if (zprt) write (6,70) iter,amax,imax,jmax,dmax
+   70 format (' iter=',i4,' largest rotation=',f12.8,
+     * ', vectors',i3,' and',i3,', incr. of diag. squares=',g12.5)
+   71 format (' i,j,a,b,sin,cos,delta =',2i3,5f10.5)
+      if (amax.lt.conv) goto 100
+      if (iter.lt.niter) goto 30
+      write (6,80)
+      write (6,*) 'niter=',niter
+   80 format (//5x,'*** maximum number of cycles exceeded ',
+     * 'in subroutine maxovl ***'//)
+      stop
+  100 continue
+      do j=1,n
+        if (s(j,j).gt.0.d0) cycle
+        do i=1,m
+          s(i,j)=-s(i,j)
+        enddo
+        do i=1,n
+          t(i,j)=-t(i,j)
+        enddo
+      enddo
+      sum=0.d0
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
+      sum=sum/m
+      do i=1,m
+        do j=1,n
+          sw=s(i,j)
+          s(i,j)=w(i,j)
+          w(i,j)=sw
+        enddo
+      enddo
+      if (.not.zprt) return
+      write (6,12) sum
+      write (6,130)
+  130 format (//5x,'transformation matrix')
+      do i=1,n
+        write (6,145) i
+        write (6,150) (t(i,j),j=1,n)
+      enddo
+  145 format (i8)
+  150 format (2x,10f12.8)
+      write (6,160)
+  160 format (//5x,'new overlap matrix'/)
+      do i=1,m
+        write (6,145) i
+        write (6,150) (w(i,j),j=1,n)
+      enddo
+      return
+      end
+
+
+c************************************************************************
+      subroutine maxovl_no_print(n,m,s,t,w)
+C
+C     This subprogram contains an iterative procedure to find the
+C     unitary transformation of a set of n vectors which maximizes
+C     the sum of their square overlaps with a set of m reference
+C     vectors (m.le.n)
+C
+C     S: overlap matrix <ref|vec>
+C     T: rotation matrix
+C     W: new overlap matrix
+C
+C
+      implicit real*8(a-h,o-y),logical*1(z)
+      parameter (id1=300)
+      dimension s(id1,id1),t(id1,id1),w(id1,id1)
+      data small/1.d-6/
+
+      zprt=.false.
+      niter=1000000
+      conv=1.d-8
+
+C      niter=1000000
+C      conv=1.d-6
+    8 mm=m-1
+      if (m.lt.n) mm=m
+      iter=0
+      do j=1,n
+        do i=1,n
+          t(i,j)=0.d0
+        enddo
+        do i=1,m
+          w(i,j)=s(i,j)
+        enddo
+        t(j,j)=1.d0
+      enddo
+      sum=0.d0
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
+      sum=sum/m
+   12 format (//5x,'Average square overlap =',f10.6)
+      if (n.eq.1) goto 100
+      last=n
+      j=1
+   21 if (j.ge.last) goto 30
+      sum=0.d0
+
+      do i=1,n
+        sum=sum+s(i,j)*s(i,j)
+      enddo
+      if (sum.gt.small) goto 28
+      do i=1,n
+        sij=s(i,j)
+        s(i,j)=-s(i,last)
+        s(i,last)=sij
+        tij=t(i,j)
+        t(i,j)=-t(i,last)
+        t(i,last)=tij
+      end do
+      last=last-1
+      goto 21
+   28 j=j+1
+      goto 21
+   30 iter=iter+1
+      imax=0
+      jmax=0
+      dmax=0.d0
+      amax=0.d0
+      do i=1,mm
+        ip=i+1
+        do j=ip,n
+          a=s(i,j)*s(i,j)-s(i,i)*s(i,i)
+          b=-s(i,i)*s(i,j)
+          if (j.gt.m) goto 31
+          a=a+s(j,i)*s(j,i)-s(j,j)*s(j,j)
+          b=b+s(j,i)*s(j,j)
+  31      b=b+b
+          if (a.eq.0.d0) goto 32
+          ba=b/a
+          if (dabs(ba).gt.small) goto 32
+          if (a.gt.0.d0) goto 33
+          tang=-0.5d0*ba
+          cosine=1.d0/dsqrt(1.d0+tang*tang)
+          sine=tang*cosine
+          goto 34
+  32      tang=0.d0
+          if (b.ne.0.d0) tang=(a+dsqrt(a*a+b*b))/b
+          cosine=1.d0/dsqrt(1.d0+tang*tang)
+          sine=tang*cosine
+          goto 34
+  33      cosine=0.d0
+          sine=1.d0
+  34      delta=sine*(a*sine+b*cosine)
+          do k=1,m
+            p=s(k,i)*cosine-s(k,j)*sine
+            q=s(k,i)*sine+s(k,j)*cosine
+            s(k,i)=p
+            s(k,j)=q
+          enddo
+          do k=1,n
+            p=t(k,i)*cosine-t(k,j)*sine
+            q=t(k,i)*sine+t(k,j)*cosine
+            t(k,i)=p
+            t(k,j)=q
+          enddo
+  45      d=dabs(sine)
+          if (d.le.amax) goto 50
+          imax=i
+          jmax=j
+          amax=d
+          dmax=delta
+  50      continue
+        end do
+      end do
+   70 format (' iter=',i4,' largest rotation=',f12.8,
+     * ', vectors',i3,' and',i3,', incr. of diag. squares=',g12.5)
+   71 format (' i,j,a,b,sin,cos,delta =',2i3,5f10.5)
+      if (amax.lt.conv) goto 100
+      if (iter.lt.niter) goto 30
+   80 format (//5x,'*** maximum number of cycles exceeded ',
+     * 'in subroutine maxovl ***'//)
+      stop
+  100 continue
+      do j=1,n
+        if (s(j,j).gt.0.d0) cycle
+        do i=1,m
+          s(i,j)=-s(i,j)
+        enddo
+        do i=1,n
+          t(i,j)=-t(i,j)
+        enddo
+      enddo
+      sum=0.d0
+      do i=1,m
+        sum=sum+s(i,i)*s(i,i)
+      enddo
+      sum=sum/m
+      do i=1,m
+        do j=1,n
+          sw=s(i,j)
+          s(i,j)=w(i,j)
+          w(i,j)=sw
+        enddo
+      enddo
+      return
+      end
+

From 2ec8b1f34c2862a470af9dfe21ebc301a2103351 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 7 Feb 2023 13:27:19 +0100
Subject: [PATCH 6/7] added missing bi_ort_ints

---
 src/bi_ort_ints/NEED                         |   4 +
 src/bi_ort_ints/README.rst                   |  25 ++
 src/bi_ort_ints/bi_ort_ints.irp.f            |  44 +++
 src/bi_ort_ints/biorthog_mo_for_h.irp.f      | 153 ++++++++
 src/bi_ort_ints/one_e_bi_ort.irp.f           |  75 ++++
 src/bi_ort_ints/semi_num_ints_mo.irp.f       | 318 ++++++++++++++++
 src/bi_ort_ints/three_body_ijm.irp.f         | 366 +++++++++++++++++++
 src/bi_ort_ints/three_body_ijmk.irp.f        | 284 ++++++++++++++
 src/bi_ort_ints/three_body_ijmkl.irp.f       | 296 +++++++++++++++
 src/bi_ort_ints/three_body_ints_bi_ort.irp.f | 207 +++++++++++
 src/bi_ort_ints/total_twoe_pot.irp.f         | 250 +++++++++++++
 src/tc_scf/test_Ne.sh                        |   4 +-
 12 files changed, 2024 insertions(+), 2 deletions(-)
 create mode 100644 src/bi_ort_ints/NEED
 create mode 100644 src/bi_ort_ints/README.rst
 create mode 100644 src/bi_ort_ints/bi_ort_ints.irp.f
 create mode 100644 src/bi_ort_ints/biorthog_mo_for_h.irp.f
 create mode 100644 src/bi_ort_ints/one_e_bi_ort.irp.f
 create mode 100644 src/bi_ort_ints/semi_num_ints_mo.irp.f
 create mode 100644 src/bi_ort_ints/three_body_ijm.irp.f
 create mode 100644 src/bi_ort_ints/three_body_ijmk.irp.f
 create mode 100644 src/bi_ort_ints/three_body_ijmkl.irp.f
 create mode 100644 src/bi_ort_ints/three_body_ints_bi_ort.irp.f
 create mode 100644 src/bi_ort_ints/total_twoe_pot.irp.f

diff --git a/src/bi_ort_ints/NEED b/src/bi_ort_ints/NEED
new file mode 100644
index 00000000..3ca12d93
--- /dev/null
+++ b/src/bi_ort_ints/NEED
@@ -0,0 +1,4 @@
+non_h_ints_mu
+ao_tc_eff_map
+bi_ortho_mos
+tc_keywords
diff --git a/src/bi_ort_ints/README.rst b/src/bi_ort_ints/README.rst
new file mode 100644
index 00000000..d496c4f7
--- /dev/null
+++ b/src/bi_ort_ints/README.rst
@@ -0,0 +1,25 @@
+===========
+bi_ort_ints
+===========
+
+This module contains all necessary integrals for the TC Hamiltonian in a bi-orthonormal (BO) MO Basis.
+See in bi_ortho_basis for more information. 
+The main providers are : 
+
+One-electron integrals 
+----------------------
++) ao_one_e_integrals_tc_tot : total one-electron Hamiltonian which might include non hermitian part coming from one-e correlation factor. 
++) mo_bi_ortho_tc_one_e : one-electron Hamiltonian (h_core+one-J terms) on the BO-MO basis. 
++) mo_bi_orth_bipole_x  : x-component of the dipole operator on the BO-MO basis. (Same for y,z) 
+
+Two-electron integrals 
+----------------------
++) ao_two_e_tc_tot : Total two-electron operator (including the non-hermitian term of the TC Hamiltonian) on the AO basis
++) mo_bi_ortho_tc_two_e : Total two-electron operator on the BO-MO basis
+
+Three-electron integrals 
+------------------------
++) three_body_ints_bi_ort : 6-indices three-electron tensor (-L) on the BO-MO basis. WARNING :: N^6 storage !
++) three_e_3_idx_direct_bi_ort : DIRECT term with 3 different indices of the -L operator. These terms appear in the DIAGONAL matrix element of the -L operator. The 5 other permutations needed to compute matrix elements can be found in three_body_ijm.irp.f 
++) three_e_4_idx_direct_bi_ort : DIRECT term with 4 different indices of the -L operator. These terms appear in the OFF-DIAGONAL matrix element of the -L operator including SINGLE EXCITATIONS. The 5 other permutations needed to compute matrix elements can be found in three_body_ijmk.irp.f 
++) three_e_5_idx_direct_bi_ort : DIRECT term with 5 different indices of the -L operator. These terms appear in the OFF-DIAGONAL matrix element of the -L operator including DOUBLE EXCITATIONS. The 5 other permutations needed to compute matrix elements can be found in three_body_ijmkl.irp.f 
diff --git a/src/bi_ort_ints/bi_ort_ints.irp.f b/src/bi_ort_ints/bi_ort_ints.irp.f
new file mode 100644
index 00000000..ca50dd56
--- /dev/null
+++ b/src/bi_ort_ints/bi_ort_ints.irp.f
@@ -0,0 +1,44 @@
+program bi_ort_ints
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 10
+  my_n_pt_a_grid = 14
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+ call test_3e
+end
+
+subroutine test_3e
+ implicit none
+ integer :: i,k,j,l,m,n,ipoint
+ double precision :: accu, contrib,new,ref
+ i = 1
+ k = 1
+ accu = 0.d0
+ do i = 1, mo_num
+  do k = 1, mo_num 
+   do j = 1, mo_num
+    do l = 1, mo_num 
+     do m = 1, mo_num
+      do n = 1, mo_num
+        call give_integrals_3_body_bi_ort(n, l, k, m, j, i, new)
+        call give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, ref)
+        contrib = dabs(new - ref)
+        accu += contrib
+        if(contrib .gt. 1.d-10)then
+         print*,'pb !!'
+         print*,i,k,j,l,m,n
+         print*,ref,new,contrib
+        endif
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ print*,'accu = ',accu/dble(mo_num)**6
+
+
+end
diff --git a/src/bi_ort_ints/biorthog_mo_for_h.irp.f b/src/bi_ort_ints/biorthog_mo_for_h.irp.f
new file mode 100644
index 00000000..452c13f1
--- /dev/null
+++ b/src/bi_ort_ints/biorthog_mo_for_h.irp.f
@@ -0,0 +1,153 @@
+
+! ---
+
+double precision function bi_ortho_mo_coul_ints(l, k, j, i)
+
+  BEGIN_DOC
+  !
+  ! < mo^L_k mo^L_l | 1/r12 | mo^R_i mo^R_j >
+  !
+  END_DOC
+
+  implicit none
+  integer, intent(in) :: i, j, k, l
+  integer             :: m, n, p, q
+
+  bi_ortho_mo_coul_ints = 0.d0
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do q = 1, ao_num
+          !                                   p1h1p2h2   l1                  l2              r1               r2
+          bi_ortho_mo_coul_ints += ao_two_e_coul(n,q,m,p) * mo_l_coef(m,l) * mo_l_coef(n,k) * mo_r_coef(p,j) * mo_r_coef(q,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+end function bi_ortho_mo_coul_ints
+
+! ---
+
+! TODO :: transform into DEGEMM
+
+BEGIN_PROVIDER [double precision, mo_bi_ortho_coul_e_chemist, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! mo_bi_ortho_coul_e_chemist(k,i,l,j) = < k l | 1/r12 | i j > where i,j are right MOs and k,l are left MOs
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l, m, n, p, q
+  double precision, allocatable :: mo_tmp_1(:,:,:,:), mo_tmp_2(:,:,:,:)
+
+  allocate(mo_tmp_1(mo_num,ao_num,ao_num,ao_num))
+  mo_tmp_1 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do q = 1, ao_num
+          do k = 1, mo_num
+            !       (k n|p m)    = sum_q c_qk * (q n|p m)
+            mo_tmp_1(k,n,p,m) += mo_l_coef_transp(k,q) * ao_two_e_coul(q,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  allocate(mo_tmp_2(mo_num,mo_num,ao_num,ao_num))
+  mo_tmp_2 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            !       (k i|p m) = sum_n c_ni * (k n|p m)
+            mo_tmp_2(k,i,p,m) += mo_r_coef_transp(i,n) * mo_tmp_1(k,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+  allocate(mo_tmp_1(mo_num,mo_num,mo_num,ao_num))
+  mo_tmp_1 = 0.d0
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_tmp_1(k,i,l,m) += mo_l_coef_transp(l,p) * mo_tmp_2(k,i,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_2)
+
+  mo_bi_ortho_coul_e_chemist = 0.d0 
+  do m = 1, ao_num
+    do j = 1, mo_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_bi_ortho_coul_e_chemist(k,i,l,j) += mo_r_coef_transp(j,m) * mo_tmp_1(k,i,l,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, mo_bi_ortho_coul_e, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! mo_bi_ortho_coul_e(k,l,i,j) = < k l | 1/r12 | i j > where i,j are right MOs and k,l are left MOs
+  !
+  END_DOC
+
+  implicit none
+  integer :: i, j, k, l
+
+  do j = 1, mo_num
+    do i = 1, mo_num
+      do l = 1, mo_num
+        do k = 1, mo_num
+           !    < k l | V12 | i j >                  (k i|l j)
+           mo_bi_ortho_coul_e(k,l,i,j) = mo_bi_ortho_coul_e_chemist(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_bi_ortho_one_e, (mo_num, mo_num)]
+
+  BEGIN_DOC 
+  !
+  ! mo_bi_ortho_one_e(k,i) = < MO^L_k | h_c | MO^R_i >
+  !
+  END_DOC
+
+  implicit none
+
+  call ao_to_mo_bi_ortho(ao_one_e_integrals, ao_num, mo_bi_ortho_one_e , mo_num)
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/bi_ort_ints/one_e_bi_ort.irp.f b/src/bi_ort_ints/one_e_bi_ort.irp.f
new file mode 100644
index 00000000..8997991d
--- /dev/null
+++ b/src/bi_ort_ints/one_e_bi_ort.irp.f
@@ -0,0 +1,75 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_one_e_integrals_tc_tot, (ao_num,ao_num)]
+
+  implicit none
+  integer :: i, j
+
+  ao_one_e_integrals_tc_tot = ao_one_e_integrals      
+
+  provide j1b_type
+
+  if( (j1b_type .eq. 1) .or. (j1b_type .eq. 2) ) then
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+        ao_one_e_integrals_tc_tot(j,i) += ( j1b_gauss_hermI  (j,i) &
+                                          + j1b_gauss_hermII (j,i) &
+                                          + j1b_gauss_nonherm(j,i) )
+      enddo
+    enddo
+
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_one_e, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! mo_bi_ortho_tc_one_e(k,i) = <MO^L_k | h_c | MO^R_i>
+  !
+  END_DOC
+
+  implicit none
+ 
+  call ao_to_mo_bi_ortho(ao_one_e_integrals_tc_tot, ao_num, mo_bi_ortho_tc_one_e, mo_num)
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [double precision, mo_bi_orth_bipole_x , (mo_num,mo_num)]
+&BEGIN_PROVIDER [double precision, mo_bi_orth_bipole_y , (mo_num,mo_num)]
+&BEGIN_PROVIDER [double precision, mo_bi_orth_bipole_z , (mo_num,mo_num)]
+ BEGIN_DOC
+ ! array of the integrals of MO_i * x MO_j
+ ! array of the integrals of MO_i * y MO_j
+ ! array of the integrals of MO_i * z MO_j
+ END_DOC
+ implicit none
+
+  call ao_to_mo_bi_ortho(                                                     &
+      ao_dipole_x,                                                   &
+      size(ao_dipole_x,1),                                           &
+      mo_bi_orth_bipole_x,                                                   &
+      size(mo_bi_orth_bipole_x,1)                                            &
+      )
+  call ao_to_mo_bi_ortho(                                                     &
+      ao_dipole_y,                                                   &
+      size(ao_dipole_y,1),                                           &
+      mo_bi_orth_bipole_y,                                                   &
+      size(mo_bi_orth_bipole_y,1)                                            &
+      )
+  call ao_to_mo_bi_ortho(                                                     &
+      ao_dipole_z,                                                   &
+      size(ao_dipole_z,1),                                           &
+      mo_bi_orth_bipole_z,                                                   &
+      size(mo_bi_orth_bipole_z,1)                                            &
+      )
+
+END_PROVIDER
+
diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
new file mode 100644
index 00000000..4694a998
--- /dev/null
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -0,0 +1,318 @@
+
+! ---
+
+! TODO :: optimization : transform into a DGEMM
+
+BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu, (mo_num, mo_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! mo_v_ki_bi_ortho_erf_rk_cst_mu(k,i,ip) = int dr chi_k(r) phi_i(r) (erf(mu |r - R_ip|) - 1 )/(2|r - R_ip|) on the BI-ORTHO MO basis 
+  ! 
+  ! where phi_k(r) is a LEFT MOs and phi_i(r) is a RIGHT MO
+  !
+  ! R_ip = the "ip"-th point of the DFT Grid
+  !
+  END_DOC
+
+  implicit none
+  integer :: ipoint
+ !$OMP PARALLEL         &
+ !$OMP DEFAULT (NONE)   &
+ !$OMP PRIVATE (ipoint) & 
+ !$OMP SHARED (n_points_final_grid,v_ij_erf_rk_cst_mu,mo_v_ki_bi_ortho_erf_rk_cst_mu)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+    call ao_to_mo_bi_ortho( v_ij_erf_rk_cst_mu            (1,1,ipoint), size(v_ij_erf_rk_cst_mu,             1) &
+                          , mo_v_ki_bi_ortho_erf_rk_cst_mu(1,1,ipoint), size(mo_v_ki_bi_ortho_erf_rk_cst_mu, 1) )
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  mo_v_ki_bi_ortho_erf_rk_cst_mu = mo_v_ki_bi_ortho_erf_rk_cst_mu * 0.5d0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_points_final_grid, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/(2|r - R|) on the BI-ORTHO MO basis
+  !
+  END_DOC
+
+  implicit none
+  integer :: ipoint, i, j
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do ipoint = 1, n_points_final_grid
+        mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,j,i) = mo_v_ki_bi_ortho_erf_rk_cst_mu(j,i,ipoint)
+      enddo
+    enddo
+  enddo
+
+! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu
+
+END_PROVIDER 
+
+! ---
+
+! TODO :: optimization : transform into a DGEMM
+
+BEGIN_PROVIDER [ double precision, mo_x_v_ki_bi_ortho_erf_rk_cst_mu, (mo_num, mo_num, 3, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! mo_x_v_ki_bi_ortho_erf_rk_cst_mu(k,i,m,ip) = int dr x(m) * chi_k(r) phi_i(r) (erf(mu |r - R_ip|) - 1)/2|r - R_ip| on the BI-ORTHO MO basis 
+  !
+  ! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => x(m) = x, m=2 => x(m) = y, m=3 => x(m) = z,
+  !
+  ! R_ip = the "ip"-th point of the DFT Grid
+  !
+  END_DOC
+
+  implicit none
+  integer :: ipoint
+
+ !$OMP PARALLEL         &
+ !$OMP DEFAULT (NONE)   &
+ !$OMP PRIVATE (ipoint) & 
+ !$OMP SHARED (n_points_final_grid,x_v_ij_erf_rk_cst_mu_transp,mo_x_v_ki_bi_ortho_erf_rk_cst_mu)
+ !$OMP DO SCHEDULE (dynamic)
+  do ipoint = 1, n_points_final_grid
+
+    call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp     (1,1,1,ipoint), size(x_v_ij_erf_rk_cst_mu_transp,      1) &
+                          , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,1,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) )
+    call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp     (1,1,2,ipoint), size(x_v_ij_erf_rk_cst_mu_transp,      1) &
+                          , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,2,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) )
+    call ao_to_mo_bi_ortho( x_v_ij_erf_rk_cst_mu_transp     (1,1,3,ipoint), size(x_v_ij_erf_rk_cst_mu_transp,      1) &
+                          , mo_x_v_ki_bi_ortho_erf_rk_cst_mu(1,1,3,ipoint), size(mo_x_v_ki_bi_ortho_erf_rk_cst_mu, 1) )
+
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  mo_x_v_ki_bi_ortho_erf_rk_cst_mu = 0.5d0 * mo_x_v_ki_bi_ortho_erf_rk_cst_mu
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3, n_points_final_grid)]
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  print *, ' providing int2_grad1_u12_ao_transp ...'
+  call wall_time(wall0)
+
+  if(test_cycle_tc)then
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3)
+       enddo
+     enddo
+   enddo
+  else
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3)
+       enddo
+     enddo
+   enddo
+  endif
+  call wall_time(wall1)
+  print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)]
+
+  implicit none
+  integer :: ipoint
+  double precision :: wall0, wall1
+
+  !print *, ' providing int2_grad1_u12_bimo_transp'
+
+  call wall_time(wall0)
+  !$OMP PARALLEL         &
+  !$OMP DEFAULT (NONE)   &
+  !$OMP PRIVATE (ipoint) & 
+  !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
+  !$OMP DO SCHEDULE (dynamic)
+   do ipoint = 1, n_points_final_grid
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3, mo_num, mo_num )]
+ implicit none
+ integer          :: i, j, ipoint
+ do ipoint = 1, n_points_final_grid
+   do i = 1, mo_num
+     do j = 1, mo_num
+      int2_grad1_u12_bimo_t(ipoint,1,j,i) = int2_grad1_u12_bimo_transp(j,i,1,ipoint)
+      int2_grad1_u12_bimo_t(ipoint,2,j,i) = int2_grad1_u12_bimo_transp(j,i,2,ipoint)
+      int2_grad1_u12_bimo_t(ipoint,3,j,i) = int2_grad1_u12_bimo_transp(j,i,3,ipoint)
+     enddo                                  
+   enddo
+ enddo
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)]
+
+  implicit none
+  integer :: i, j, ipoint
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, ao_num
+        int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(j,i,ipoint,1)
+        int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(j,i,ipoint,2)
+        int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(j,i,ipoint,3)
+      enddo                                  
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp, (n_points_final_grid, 3, mo_num, mo_num)]
+
+  implicit none
+  integer :: i, j, ipoint
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do ipoint = 1, n_points_final_grid
+        mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,1,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,1,ipoint)
+        mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,2,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,2,ipoint)
+        mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,3,j,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu(j,i,3,ipoint)
+      enddo
+    enddo
+  enddo
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk, (n_points_final_grid, 3, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! x_W_ki_bi_ortho_erf_rk(ip,m,k,i) = \int dr chi_k(r) \frac{(1 - erf(mu |r-R_ip|))}{2|r-R_ip|} (x(m)-R_ip(m)) phi_i(r) ON THE BI-ORTHO MO BASIS 
+  !
+  ! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => X(m) = x, m=2 => X(m) = y, m=3 => X(m) = z,
+  !
+  ! R_ip = the "ip"-th point of the DFT Grid
+  END_DOC
+ 
+  implicit none
+  include 'constants.include.F'
+ 
+  integer          :: ipoint, m, i, k
+  double precision :: xyz
+  double precision :: wall0, wall1
+ 
+  print*, ' providing x_W_ki_bi_ortho_erf_rk ...'
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (ipoint,m,i,k,xyz) & 
+ !$OMP SHARED (x_W_ki_bi_ortho_erf_rk,n_points_final_grid,mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_num,final_grid_points) 
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do m = 1, 3
+        do ipoint = 1, n_points_final_grid
+          xyz = final_grid_points(m,ipoint)
+          x_W_ki_bi_ortho_erf_rk(ipoint,m,k,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,m,k,i) - xyz * mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ ! FREE mo_v_ki_bi_ortho_erf_rk_cst_mu_transp 
+ ! FREE mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp
+
+  call wall_time(wall1)
+  print *, ' time to provide x_W_ki_bi_ortho_erf_rk = ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_W_ki_bi_ortho_erf_rk_diag, (n_points_final_grid, 3, mo_num)]
+  BEGIN_DOC
+  ! x_W_ki_bi_ortho_erf_rk_diag(ip,m,i) = \int dr chi_i(r) (1 - erf(mu |r-R_ip|)) (x(m)-X(m)_ip) phi_i(r) ON THE BI-ORTHO MO BASIS 
+!
+! where chi_k(r)/phi_i(r) are left/right MOs, m=1 => X(m) = x, m=2 => X(m) = y, m=3 => X(m) = z,
+!
+! R_ip = the "ip"-th point of the DFT Grid
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+ 
+  integer          :: ipoint, m, i
+  double precision :: xyz
+  double precision :: wall0, wall1
+ 
+  print*,'providing x_W_ki_bi_ortho_erf_rk_diag ...'
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (ipoint,m,i,xyz) & 
+ !$OMP SHARED (x_W_ki_bi_ortho_erf_rk_diag,n_points_final_grid,mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_v_ki_bi_ortho_erf_rk_cst_mu_transp,mo_num,final_grid_points) 
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do m = 1, 3
+      do ipoint = 1, n_points_final_grid
+        xyz = final_grid_points(m,ipoint)
+        x_W_ki_bi_ortho_erf_rk_diag(ipoint,m,i) = mo_x_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,m,i,i) - xyz * mo_v_ki_bi_ortho_erf_rk_cst_mu_transp(ipoint,i,i)
+      enddo
+    enddo
+  enddo
+
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print*,'time to provide x_W_ki_bi_ortho_erf_rk_diag = ',wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f
new file mode 100644
index 00000000..4d21cb93
--- /dev/null
+++ b/src/bi_ort_ints/three_body_ijm.irp.f
@@ -0,0 +1,366 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_direct_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the direct terms 
+  !
+  ! three_e_3_idx_direct_bi_ort(m,j,i) = <mji|-L|mji>
+  ! 
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_direct_bi_ort = 0.d0
+  print *, ' Providing the three_e_3_idx_direct_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_direct_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, m, j, i, integral)
+        three_e_3_idx_direct_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_direct_bi_ort(m,j,i) = three_e_3_idx_direct_bi_ort(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_direct_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the first cyclic permutation 
+  !
+  ! three_e_3_idx_cycle_1_bi_ort(m,j,i) = <mji|-L|jim>
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_cycle_1_bi_ort = 0.d0
+  print *, ' Providing the three_e_3_idx_cycle_1_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_cycle_1_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, j, i, m, integral)
+        three_e_3_idx_cycle_1_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_cycle_1_bi_ort(m,j,i) = three_e_3_idx_cycle_1_bi_ort(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_cycle_1_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the second cyclic permutation 
+  !
+  ! three_e_3_idx_direct_bi_ort(m,j,i) = <mji|-L|imj>
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_cycle_2_bi_ort = 0.d0
+  print *, ' Providing the three_e_3_idx_cycle_2_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_cycle_2_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, i, m, j, integral)
+        three_e_3_idx_cycle_2_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_cycle_2_bi_ort(m,j,i) = three_e_3_idx_cycle_2_bi_ort(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_cycle_2_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_exch23_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the permutations of particle 2 and 3
+  !
+  ! three_e_3_idx_exch23_bi_ort(m,j,i) = <mji|-L|jmi>
+  ! 
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_exch23_bi_ort = 0.d0
+  print*,'Providing the three_e_3_idx_exch23_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_exch23_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, j, m, i, integral)
+        three_e_3_idx_exch23_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_exch23_bi_ort(m,j,i) = three_e_3_idx_exch23_bi_ort(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_exch23_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_exch13_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the permutations of particle 1 and 3
+  !
+  ! three_e_3_idx_exch13_bi_ort(m,j,i) = <mji|-L|ijm>
+  ! 
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer :: i,j,m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_exch13_bi_ort = 0.d0
+  print *, ' Providing the three_e_3_idx_exch13_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_exch13_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, i, j, m,integral)
+        three_e_3_idx_exch13_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_exch13_bi_ort(m,j,i) = three_e_3_idx_exch13_bi_ort(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_exch13_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the permutations of particle 1 and 2
+  !
+  ! three_e_3_idx_exch12_bi_ort(m,j,i) = <mji|-L|mij>
+  ! 
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_exch12_bi_ort = 0.d0
+  print *, ' Providing the three_e_3_idx_exch12_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_exch12_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, m, i, j, integral)
+        three_e_3_idx_exch12_bi_ort(m,j,i) = -1.d0 * integral 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_exch12_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_3_idx_exch12_bi_ort_new, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the permutations of particle 1 and 2
+  !
+  ! three_e_3_idx_exch12_bi_ort_new(m,j,i) = <mji|-L|mij>
+  ! 
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0
+
+  three_e_3_idx_exch12_bi_ort_new = 0.d0
+  print *, ' Providing the three_e_3_idx_exch12_bi_ort_new ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_3_idx_exch12_bi_ort_new)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        call give_integrals_3_body_bi_ort(m, j, i, m, i, j, integral)
+        three_e_3_idx_exch12_bi_ort_new(m,j,i) = -1.d0 * integral 
+    enddo
+   enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_3_idx_exch12_bi_ort_new(m,j,i) = three_e_3_idx_exch12_bi_ort_new(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_3_idx_exch12_bi_ort_new', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f
new file mode 100644
index 00000000..853972f7
--- /dev/null
+++ b/src/bi_ort_ints/three_body_ijmk.irp.f
@@ -0,0 +1,284 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_direct_bi_ort(m,j,k,i) = <mjk|-L|mji> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+ implicit none
+ integer          :: i, j, k, m
+ double precision :: integral, wall1, wall0
+
+  three_e_4_idx_direct_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_direct_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_direct_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, m, j, i, integral)
+          three_e_4_idx_direct_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_direct_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = <mjk|-L|jim> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m
+  double precision :: integral, wall1, wall0
+
+  three_e_4_idx_cycle_1_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_cycle_1_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_cycle_1_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, j, i, m, integral)
+          three_e_4_idx_cycle_1_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_cycle_1_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! --
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = <mjk|-L|imj> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m
+  double precision :: integral, wall1, wall0
+
+  three_e_4_idx_cycle_2_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_cycle_2_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_cycle_2_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, i, m, j, integral)
+          three_e_4_idx_cycle_2_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_cycle_2_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_exch23_bi_ort(m,j,k,i) = <mjk|-L|jmi> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m
+  double precision :: integral, wall1, wall0
+
+  three_e_4_idx_exch23_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_exch23_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_exch23_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, j, m, i, integral)
+          three_e_4_idx_exch23_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_exch23_bi_ort', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_exch13_bi_ort(m,j,k,i) = <mjk|-L|ijm> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m
+  double precision :: integral, wall1, wall0
+
+  three_e_4_idx_exch13_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_exch13_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_exch13_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, i, j, m, integral)
+          three_e_4_idx_exch13_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_exch13_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  ! 
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_4_idx_exch12_bi_ort(m,j,k,i) = <mjk|-L|mij> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  ! 
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m
+  double precision :: integral, wall1, wall0
+
+  three_e_4_idx_exch12_bi_ort = 0.d0
+  print *, ' Providing the three_e_4_idx_exch12_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_4_idx_exch12_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          call give_integrals_3_body_bi_ort(m, j, k, m, i, j, integral)
+          three_e_4_idx_exch12_bi_ort(m,j,k,i) = -1.d0 * integral 
+        enddo
+      enddo
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_4_idx_exch12_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f
new file mode 100644
index 00000000..bd5c4977
--- /dev/null
+++ b/src/bi_ort_ints/three_body_ijmkl.irp.f
@@ -0,0 +1,296 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = <mlk|-L|mji> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_direct_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_direct_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_direct_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, m, j, i, integral)
+            three_e_5_idx_direct_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_direct_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_1_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = <mlk|-L|jim> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_cycle_1_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_cycle_1_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_cycle_1_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, j, i, m, integral)
+            three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_cycle_1_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_cycle_2_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE FIRST CYCLIC PERMUTATION TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = <mlk|-L|imj> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_cycle_2_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_cycle_2_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_cycle_2_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          do l = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, i, m, j, integral)
+            three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_cycle_2_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_exch23_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = <mlk|-L|jmi> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_exch23_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_exch23_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_exch23_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, j, m, i, integral)
+            three_e_5_idx_exch23_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_exch23_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = <mlk|-L|ijm> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_exch13_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_exch13_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_exch13_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, i, j, m, integral)
+            three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_exch13_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = <mlk|-L|mij> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0
+
+  three_e_5_idx_exch12_bi_ort = 0.d0
+  print *, ' Providing the three_e_5_idx_exch12_bi_ort ...'
+  call wall_time(wall0)
+
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_5_idx_exch12_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            call give_integrals_3_body_bi_ort(m, l, k, m, i, j, integral)
+            three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = -1.d0 * integral 
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_5_idx_exch12_bi_ort', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
new file mode 100644
index 00000000..48fa84f7
--- /dev/null
+++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
@@ -0,0 +1,207 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+ BEGIN_DOC
+! matrix element of the -L  three-body operator 
+!
+! notice the -1 sign: in this way three_body_ints_bi_ort can be directly used to compute Slater rules :)
+ END_DOC
+
+ implicit none
+ integer          :: i, j, k, l, m, n
+ double precision :: integral, wall1, wall0
+ character*(128)  :: name_file 
+
+  three_body_ints_bi_ort = 0.d0
+  print *, ' Providing the three_body_ints_bi_ort ...'
+  call wall_time(wall0)
+  name_file = 'six_index_tensor'
+
+! if(read_three_body_ints_bi_ort)then
+!  call read_fcidump_3_tc(three_body_ints_bi_ort)
+! else
+!  if(read_three_body_ints_bi_ort)then
+!   print*,'Reading three_body_ints_bi_ort from disk ...'
+!   call read_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file)
+!  else
+
+  !provide x_W_ki_bi_ortho_erf_rk 
+  provide mos_r_in_r_array_transp mos_l_in_r_array_transp
+
+ !$OMP PARALLEL                       &
+ !$OMP DEFAULT (NONE)                 &
+ !$OMP PRIVATE (i,j,k,l,m,n,integral) & 
+ !$OMP SHARED (mo_num,three_body_ints_bi_ort)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, mo_num
+        do k = 1, mo_num
+          do l = 1, mo_num
+            do n = 1, mo_num
+              call give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
+
+              three_body_ints_bi_ort(n,l,k,m,j,i) = -1.d0 * integral 
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+!  endif
+! endif
+
+  call wall_time(wall1)
+  print *, ' wall time for three_body_ints_bi_ort', wall1 - wall0
+! if(write_three_body_ints_bi_ort)then
+!  print*,'Writing three_body_ints_bi_ort on disk ...'
+!  call write_array_6_index_tensor(mo_num,three_body_ints_bi_ort,name_file)
+!  call ezfio_set_three_body_ints_bi_ort_io_three_body_ints_bi_ort("Read")
+! endif
+
+END_PROVIDER 
+
+! ---
+
+subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
+
+  BEGIN_DOC
+  !
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n, l, k, m, j, i
+  double precision, intent(out) :: integral
+  integer                       :: ipoint
+  double precision              :: weight
+
+  integral = 0.d0
+  do ipoint = 1, n_points_final_grid
+    weight = final_weight_at_r_vector(ipoint)                                                                          
+
+    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i)        & 
+              * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,l,j) &
+                + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,l,j) &
+                + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,l,j) )
+    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j)        & 
+              * ( int2_grad1_u12_bimo_t(ipoint,1,n,m) * int2_grad1_u12_bimo_t(ipoint,1,k,i) &
+                + int2_grad1_u12_bimo_t(ipoint,2,n,m) * int2_grad1_u12_bimo_t(ipoint,2,k,i) &
+                + int2_grad1_u12_bimo_t(ipoint,3,n,m) * int2_grad1_u12_bimo_t(ipoint,3,k,i) )
+    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m)        &
+              * ( int2_grad1_u12_bimo_t(ipoint,1,l,j) * int2_grad1_u12_bimo_t(ipoint,1,k,i) &
+                + int2_grad1_u12_bimo_t(ipoint,2,l,j) * int2_grad1_u12_bimo_t(ipoint,2,k,i) &
+                + int2_grad1_u12_bimo_t(ipoint,3,l,j) * int2_grad1_u12_bimo_t(ipoint,3,k,i) )
+
+  enddo
+
+end subroutine give_integrals_3_body_bi_ort
+
+! ---
+
+subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral)
+
+  BEGIN_DOC
+  !
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n, l, k, m, j, i
+  double precision, intent(out) :: integral
+  integer                       :: ipoint
+  double precision              :: weight
+
+  integral = 0.d0
+  do ipoint = 1, n_points_final_grid
+    weight = final_weight_at_r_vector(ipoint)                                                                          
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & 
+!              * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j)  )
+!    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & 
+!              * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,2,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,3,n,m) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i)  )
+!    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) &
+!              * ( x_W_ki_bi_ortho_erf_rk(ipoint,1,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,1,k,i)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,2,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,2,k,i)  &
+!                + x_W_ki_bi_ortho_erf_rk(ipoint,3,l,j) * x_W_ki_bi_ortho_erf_rk(ipoint,3,k,i)  )
+
+!    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i) & 
+!              * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,l,j,ipoint)        &
+!                + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,l,j,ipoint)        &
+!                + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,l,j,ipoint)        )
+!    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j) & 
+!              * ( int2_grad1_u12_bimo(1,n,m,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint)        &
+!                + int2_grad1_u12_bimo(2,n,m,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint)        &
+!                + int2_grad1_u12_bimo(3,n,m,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint)        )
+!    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m) &
+!              * ( int2_grad1_u12_bimo(1,l,j,ipoint) * int2_grad1_u12_bimo(1,k,i,ipoint)        &
+!                + int2_grad1_u12_bimo(2,l,j,ipoint) * int2_grad1_u12_bimo(2,k,i,ipoint)        &
+!                + int2_grad1_u12_bimo(3,l,j,ipoint) * int2_grad1_u12_bimo(3,k,i,ipoint)        )
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
+
+    integral += weight * mos_l_in_r_array_transp(ipoint,k) * mos_r_in_r_array_transp(ipoint,i)        & 
+              * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(l,j,1,ipoint) &
+                + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(l,j,2,ipoint) &
+                + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(l,j,3,ipoint) )
+    integral += weight * mos_l_in_r_array_transp(ipoint,l) * mos_r_in_r_array_transp(ipoint,j)        & 
+              * ( int2_grad1_u12_bimo_transp(n,m,1,ipoint) * int2_grad1_u12_bimo_transp(k,i,1,ipoint) &
+                + int2_grad1_u12_bimo_transp(n,m,2,ipoint) * int2_grad1_u12_bimo_transp(k,i,2,ipoint) &
+                + int2_grad1_u12_bimo_transp(n,m,3,ipoint) * int2_grad1_u12_bimo_transp(k,i,3,ipoint) )
+    integral += weight * mos_l_in_r_array_transp(ipoint,n) * mos_r_in_r_array_transp(ipoint,m)        &
+              * ( int2_grad1_u12_bimo_transp(l,j,1,ipoint) * int2_grad1_u12_bimo_transp(k,i,1,ipoint) &
+                + int2_grad1_u12_bimo_transp(l,j,2,ipoint) * int2_grad1_u12_bimo_transp(k,i,2,ipoint) &
+                + int2_grad1_u12_bimo_transp(l,j,3,ipoint) * int2_grad1_u12_bimo_transp(k,i,3,ipoint) )
+
+  enddo
+
+end subroutine give_integrals_3_body_bi_ort_old
+
+! ---
+
+subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral)
+
+  BEGIN_DOC
+  !
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n, l, k, m, j, i
+  double precision, intent(out) :: integral
+  integer                       :: ipoint
+  double precision              :: weight
+
+  integral = 0.d0
+  do ipoint = 1, n_points_final_grid
+    weight = final_weight_at_r_vector(ipoint)                                                                          
+
+    integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) )
+    integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+    integral += weight * aos_in_r_array_transp(ipoint,n) * aos_in_r_array_transp(ipoint,m) &
+              * ( int2_grad1_u12_ao_t(ipoint,1,l,j) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,l,j) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,l,j) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+
+  enddo
+
+end subroutine give_integrals_3_body_bi_ort_ao
+
+! ---
diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f
new file mode 100644
index 00000000..e74c6d2a
--- /dev/null
+++ b/src/bi_ort_ints/total_twoe_pot.irp.f
@@ -0,0 +1,250 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! ao_two_e_tc_tot(k,i,l,j) = (ki|V^TC(r_12)|lj) = <lk| V^TC(r_12) |ji> where V^TC(r_12) is the total TC operator 
+  !
+  ! including both hermitian and non hermitian parts. THIS IS IN CHEMIST NOTATION. 
+  !
+  ! WARNING :: non hermitian ! acts on "the right functions" (i,j)
+  !
+  END_DOC
+
+  integer                    :: i, j, k, l
+  double precision           :: integral_sym, integral_nsym
+  double precision, external :: get_ao_tc_sym_two_e_pot
+
+  provide j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do j = 1, ao_num
+      do l = 1, ao_num
+        do i = 1, ao_num
+          do k = 1, ao_num
+            ao_two_e_tc_tot(k,i,l,j) = ao_tc_int_chemist(k,i,l,j)
+            !write(222,*) ao_two_e_tc_tot(k,i,l,j) 
+          enddo
+        enddo
+      enddo
+    enddo
+
+  else
+
+    PROVIDE ao_tc_sym_two_e_pot_in_map
+
+    do j = 1, ao_num
+      do l = 1, ao_num
+        do i = 1, ao_num
+          do k = 1, ao_num
+
+            integral_sym  = get_ao_tc_sym_two_e_pot(i, j, k, l, ao_tc_sym_two_e_pot_map)
+            ! ao_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the AO basis
+            integral_nsym = ao_non_hermit_term_chemist(k,i,l,j)
+
+            !print *, ' sym     integ = ', integral_sym
+            !print *, ' non-sym integ = ', integral_nsym
+
+            ao_two_e_tc_tot(k,i,l,j) = integral_sym + integral_nsym 
+            !write(111,*) ao_two_e_tc_tot(k,i,l,j) 
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+END_PROVIDER 
+
+! ---
+
+double precision function bi_ortho_mo_ints(l, k, j, i)
+
+  BEGIN_DOC
+  !
+  ! <mo^L_k mo^L_l | V^TC(r_12) | mo^R_i mo^R_j>
+  !
+  ! WARNING :: very naive, super slow, only used to DEBUG.
+  !
+  END_DOC
+
+  implicit none
+  integer, intent(in) :: i, j, k, l
+  integer             :: m, n, p, q
+
+  bi_ortho_mo_ints = 0.d0
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do q = 1, ao_num
+          !                                   p1h1p2h2   l1                  l2              r1               r2
+          bi_ortho_mo_ints += ao_two_e_tc_tot(n,q,m,p) * mo_l_coef(m,l) * mo_l_coef(n,k) * mo_r_coef(p,j) * mo_r_coef(q,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+end function bi_ortho_mo_ints
+
+! ---
+
+! TODO :: transform into DEGEMM
+
+BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e_chemist, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! mo_bi_ortho_tc_two_e_chemist(k,i,l,j) = <k l|V(r_12)|i j> where i,j are right MOs and k,l are left MOs
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l, m, n, p, q
+  double precision, allocatable :: mo_tmp_1(:,:,:,:), mo_tmp_2(:,:,:,:)
+
+  allocate(mo_tmp_1(mo_num,ao_num,ao_num,ao_num))
+  mo_tmp_1 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do q = 1, ao_num
+          do k = 1, mo_num
+            !       (k n|p m)    = sum_q c_qk * (q n|p m)
+            mo_tmp_1(k,n,p,m) += mo_l_coef_transp(k,q) * ao_two_e_tc_tot(q,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  allocate(mo_tmp_2(mo_num,mo_num,ao_num,ao_num))
+  mo_tmp_2 = 0.d0
+
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do n = 1, ao_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            !       (k i|p m) = sum_n c_ni * (k n|p m)
+            mo_tmp_2(k,i,p,m) += mo_r_coef_transp(i,n) * mo_tmp_1(k,n,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+  allocate(mo_tmp_1(mo_num,mo_num,mo_num,ao_num))
+  mo_tmp_1 = 0.d0
+  do m = 1, ao_num
+    do p = 1, ao_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_tmp_1(k,i,l,m) += mo_l_coef_transp(l,p) * mo_tmp_2(k,i,p,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_2)
+
+  mo_bi_ortho_tc_two_e_chemist = 0.d0 
+  do m = 1, ao_num
+    do j = 1, mo_num
+      do l = 1, mo_num
+        do i = 1, mo_num
+          do k = 1, mo_num
+            mo_bi_ortho_tc_two_e_chemist(k,i,l,j) += mo_r_coef_transp(j,m) * mo_tmp_1(k,i,l,m)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+  deallocate(mo_tmp_1)
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, mo_bi_ortho_tc_two_e, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! mo_bi_ortho_tc_two_e(k,l,i,j) = <k l| V(r_12) |i j> where i,j are right MOs and k,l are left MOs
+  !
+  ! the potential V(r_12) contains ALL TWO-E CONTRIBUTION OF THE TC-HAMILTONIAN
+  !
+  END_DOC
+
+  implicit none
+  integer :: i, j, k, l
+
+  do j = 1, mo_num
+    do i = 1, mo_num
+      do l = 1, mo_num
+        do k = 1, mo_num
+           !              < k l | V12 | i j >                          (k i|l j)
+           mo_bi_ortho_tc_two_e(k,l,i,j) = mo_bi_ortho_tc_two_e_chemist(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
+
+ BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_exchange, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_anti, (mo_num,mo_num) ]
+  implicit none
+  BEGIN_DOC
+  ! mo_bi_ortho_tc_two_e_jj(i,j) = J_ij = <ji|W-K|ji>
+  ! mo_bi_ortho_tc_two_e_jj_exchange(i,j) = K_ij = <ij|W-K|ji>
+  ! mo_bi_ortho_tc_two_e_jj_anti(i,j) = J_ij - K_ij
+  END_DOC
+
+  integer                        :: i,j
+  double precision               :: get_two_e_integral
+
+  mo_bi_ortho_tc_two_e_jj = 0.d0
+  mo_bi_ortho_tc_two_e_jj_exchange = 0.d0
+
+  do i=1,mo_num
+    do j=1,mo_num
+      mo_bi_ortho_tc_two_e_jj(i,j) = mo_bi_ortho_tc_two_e(j,i,j,i)
+      mo_bi_ortho_tc_two_e_jj_exchange(i,j) = mo_bi_ortho_tc_two_e(i,j,j,i)
+      mo_bi_ortho_tc_two_e_jj_anti(i,j) = mo_bi_ortho_tc_two_e_jj(i,j) - mo_bi_ortho_tc_two_e_jj_exchange(i,j)
+    enddo
+  enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals, (mo_num,mo_num, mo_num)]
+&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals,(mo_num,mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! tc_2e_3idx_coulomb_integrals(j,k,i)  = <jk|ji> 
+ !
+ ! tc_2e_3idx_exchange_integrals(j,k,i) = <kj|ji> 
+ END_DOC
+ integer :: i,j,k,l
+ double precision :: get_two_e_integral
+ double precision :: integral
+
+ do i = 1, mo_num
+  do k = 1, mo_num
+   do j = 1, mo_num
+     tc_2e_3idx_coulomb_integrals(j, k,i) = mo_bi_ortho_tc_two_e(j ,k ,j ,i ) 
+     tc_2e_3idx_exchange_integrals(j,k,i) = mo_bi_ortho_tc_two_e(k ,j ,j ,i ) 
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER
diff --git a/src/tc_scf/test_Ne.sh b/src/tc_scf/test_Ne.sh
index 27ea73c2..a6422931 100755
--- a/src/tc_scf/test_Ne.sh
+++ b/src/tc_scf/test_Ne.sh
@@ -2,12 +2,12 @@ QP_ROOT=/home/eginer/new_qp2/qp2
 source ${QP_ROOT}/quantum_package.rc
   echo Ne > Ne.xyz
   echo $QP_ROOT
-  qp create_ezfio -b cc-pcvdz Ne.xyz 
+  qp create_ezfio -b cc-pcvdz Ne.xyz -o Ne_tc_scf
   qp run scf 
   qp set tc_keywords bi_ortho True 
   qp set ao_two_e_erf_ints mu_erf 0.87 
   qp set tc_keywords j1b_pen [1.5]
   qp set tc_keywords j1b_type 3 
-  qp run tc_scf | tee Ne.ezfio.tc_scf.out 
+  qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out 
   grep "TC energy =" Ne.ezfio.tc_scf.out | tail -1 
   eref=-128.552134

From d6ed501c91b8191f1869164e54b7e656d207c0ea Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 7 Feb 2023 13:43:37 +0100
Subject: [PATCH 7/7] added a proper test for tc_scf

---
 src/tc_scf/11.tc_scf.bats | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 src/tc_scf/11.tc_scf.bats

diff --git a/src/tc_scf/11.tc_scf.bats b/src/tc_scf/11.tc_scf.bats
new file mode 100644
index 00000000..a5171902
--- /dev/null
+++ b/src/tc_scf/11.tc_scf.bats
@@ -0,0 +1,27 @@
+#!/usr/bin/env bats
+
+source $QP_ROOT/tests/bats/common.bats.sh
+source $QP_ROOT/quantum_package.rc
+
+
+function run_Ne() {
+  rm -rf Ne_tc_scf
+  echo Ne > Ne.xyz
+  qp create_ezfio -b cc-pcvdz Ne.xyz -o Ne_tc_scf
+  qp run scf 
+  qp set tc_keywords bi_ortho True 
+  qp set tc_keywords test_cycle_tc True
+  qp set ao_two_e_erf_ints mu_erf 0.87 
+  qp set tc_keywords j1b_pen [1.5]
+  qp set tc_keywords j1b_type 3 
+  qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out 
+  eref=-128.552134
+  energy="$(qp get tc_scf bitc_energy)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "Ne" {
+ run_Ne 
+}
+