diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
index 4fc9ad6b..3d7fbe50 100644
--- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
@@ -233,9 +233,6 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
   double precision              :: NAI_pol_mult_erf
 
   ints = 0.d0
-  if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then
-    return
-  endif
 
   num_A         = ao_nucl(i_ao)
   power_A(1:3)  = ao_power(i_ao,1:3)
@@ -260,11 +257,11 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
         coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
 
         ! First term = (x-Ax)**(ax+1)
-        integral =  NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+        integral = NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
         ints(m) += integral * coef
 
         ! Second term = Ax * (x-Ax)**(ax)
-        integral =  NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+        integral = NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
         ints(m) += A_center(m) * integral * coef
 
       enddo
@@ -274,7 +271,8 @@ subroutine NAI_pol_x_mult_erf_ao(i_ao, j_ao, mu_in, C_center, ints)
 end subroutine NAI_pol_x_mult_erf_ao
 
 ! ---
-subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
+
+subroutine NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
 
   BEGIN_DOC
   !
@@ -292,20 +290,16 @@ subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
 
   implicit none
 
-  integer,          intent(in)  :: i_ao, j_ao, n_points
-  double precision, intent(in)  :: mu_in, C_center(n_points,3)
-  double precision, intent(out) :: ints(n_points,3)
+  integer,          intent(in)  :: i_ao, j_ao, LD_C, LD_ints, n_points
+  double precision, intent(in)  :: mu_in, C_center(LD_C,3)
+  double precision, intent(out) :: ints(LD_ints,3)
 
   integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in
   integer                       :: power_xA(3), m, ipoint
   double precision              :: A_center(3), B_center(3), alpha, beta, coef
   double precision, allocatable :: integral(:)
-  double precision              :: NAI_pol_mult_erf
 
-  ints = 0.d0
-  if(ao_overlap_abs(j_ao,i_ao).lt.1.d-12) then
-    return
-  endif
+  ints(1:LD_ints,1:3) = 0.d0
 
   num_A         = ao_nucl(i_ao)
   power_A(1:3)  = ao_power(i_ao,1:3)
@@ -317,13 +311,15 @@ subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
   n_pt_in = n_pt_max_integrals
 
   allocate(integral(n_points))
+  integral = 0.d0
+
   do i = 1, ao_prim_num(i_ao)
     alpha = ao_expo_ordered_transp(i,i_ao)
 
     do m = 1, 3
 
-      power_xA = power_A
       ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA = power_A
       power_xA(m) += 1
 
       do j = 1, ao_prim_num(j_ao)
@@ -331,25 +327,494 @@ subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
         coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
 
         ! First term = (x-Ax)**(ax+1)
-        call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points)
-        do ipoint=1,n_points
+        call NAI_pol_mult_erf_v(A_center, B_center, power_xA, power_B, alpha, beta, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+        do ipoint = 1, n_points
           ints(ipoint,m) += integral(ipoint) * coef
         enddo
 
         ! Second term = Ax * (x-Ax)**(ax)
-        call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, integral, n_points)
-        do ipoint=1,n_points
+        call NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+        do ipoint = 1, n_points
           ints(ipoint,m) += A_center(m) * integral(ipoint) * coef
         enddo
 
       enddo
     enddo
   enddo
+
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_v0
+
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, LD_C, LD_ints, n_points(3)
+  double precision, intent(in)  :: mu_in, C_center(LD_C,3,3)
+  double precision, intent(out) :: ints(LD_ints,3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, LD_integral
+  integer                       :: power_xA(3), m, ipoint, n_points_m
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef
+  double precision, allocatable :: integral(:)
+
+  ints(1:LD_ints,1:3) = 0.d0
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  LD_integral = max(max(n_points(1), n_points(2)), n_points(3))
+  allocate(integral(LD_integral))
+  integral = 0.d0
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+      n_points_m = n_points(m)
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA = power_A
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        beta = ao_expo_ordered_transp(j,j_ao)
+        coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+        call NAI_pol_mult_erf_v( A_center, B_center, power_xA, power_B, alpha, beta & 
+                               , C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_v( A_center, B_center, power_A, power_B, alpha, beta &
+                               , C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += A_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+
   deallocate(integral)
 
 end subroutine NAI_pol_x_mult_erf_ao_v
 
 ! ---
+
+double precision function NAI_pol_x_mult_erf_ao_x(i_ao, j_ao, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  NAI_pol_x_mult_erf_ao_x = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  power_xA = power_A
+  power_xA(1) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_x += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_x += A_center(1) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_x
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_y(i_ao, j_ao, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  NAI_pol_x_mult_erf_ao_y = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  power_xA = power_A
+  power_xA(2) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_y += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_y += A_center(2) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_y
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_z(i_ao, j_ao, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: mu_in, C_center(3)
+
+  integer                       :: i, j, num_A, num_B, power_A(3), power_B(3), n_pt_in, power_xA(3)
+  double precision              :: A_center(3), B_center(3), integral, alpha, beta, coef
+
+  double precision              :: NAI_pol_mult_erf
+
+  NAI_pol_x_mult_erf_ao_z = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) return
+
+  num_A         = ao_nucl(i_ao)
+  power_A(1:3)  = ao_power(i_ao,1:3)
+  A_center(1:3) = nucl_coord(num_A,1:3)
+  num_B         = ao_nucl(j_ao)
+  power_B(1:3)  = ao_power(j_ao,1:3)
+  B_center(1:3) = nucl_coord(num_B,1:3)
+
+  power_xA = power_A
+  power_xA(3) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alpha = ao_expo_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      beta = ao_expo_ordered_transp(j,j_ao)
+      coef = ao_coef_normalized_ordered_transp(j,j_ao) * ao_coef_normalized_ordered_transp(i,i_ao)
+
+      ! First term = (x-Ax)**(ax+1)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_xA, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_z += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral =  NAI_pol_mult_erf(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in)
+      NAI_pol_x_mult_erf_ao_z += A_center(3) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_z
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_with1s_x(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3)
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+  double precision, external    :: NAI_pol_x_mult_erf_ao_x
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_x_mult_erf_ao_with1s_x = NAI_pol_x_mult_erf_ao_x(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  NAI_pol_x_mult_erf_ao_with1s_x = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  power_xA     = power_Ai
+  power_xA(1) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alphaj = ao_expo_ordered_transp                   (j,j_ao)
+      coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+
+      ! First term = (x-Ax)**(ax+1)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_x += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_x += Ai_center(1) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_with1s_x
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_with1s_y(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3)
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+  double precision, external    :: NAI_pol_x_mult_erf_ao_y
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_x_mult_erf_ao_with1s_y = NAI_pol_x_mult_erf_ao_y(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  NAI_pol_x_mult_erf_ao_with1s_y = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  power_xA     = power_Ai
+  power_xA(2) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alphaj = ao_expo_ordered_transp                   (j,j_ao)
+      coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+
+      ! First term = (x-Ax)**(ax+1)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_y += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_y += Ai_center(2) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_with1s_y
+
+! ---
+
+double precision function NAI_pol_x_mult_erf_ao_with1s_z(i_ao, j_ao, beta, B_center, mu_in, C_center)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'                                                                                                                                  
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao
+  double precision, intent(in)  :: beta, B_center(3), mu_in, C_center(3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3)
+  double precision              :: Ai_center(3), Aj_center(3), integral, alphai, alphaj, coef, coefi
+
+  double precision, external    :: NAI_pol_mult_erf_with1s
+  double precision, external    :: NAI_pol_x_mult_erf_ao_z
+
+  ASSERT(beta .ge. 0.d0)
+  if(beta .lt. 1d-10) then
+    NAI_pol_x_mult_erf_ao_with1s_z = NAI_pol_x_mult_erf_ao_z(i_ao, j_ao, mu_in, C_center)
+    return
+  endif
+
+  NAI_pol_x_mult_erf_ao_with1s_z = 0.d0
+  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
+    return
+  endif
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  power_xA     = power_Ai
+  power_xA(3) += 1
+
+  n_pt_in = n_pt_max_integrals
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do j = 1, ao_prim_num(j_ao)
+      alphaj = ao_expo_ordered_transp                   (j,j_ao)
+      coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao) 
+
+      ! First term = (x-Ax)**(ax+1)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_z += integral * coef
+
+      ! Second term = Ax * (x-Ax)**(ax)
+      integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj &
+                                        , beta, B_center, C_center, n_pt_in, mu_in )
+      NAI_pol_x_mult_erf_ao_with1s_z += Ai_center(3) * integral * coef
+
+    enddo
+  enddo
+
+end function NAI_pol_x_mult_erf_ao_with1s_z
+
+! ---
+
 subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_center, ints)
 
   BEGIN_DOC
@@ -384,9 +849,6 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
   endif
 
   ints = 0.d0
-  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
-    return
-  endif
 
   power_Ai(1:3) = ao_power(i_ao,1:3)
   power_Aj(1:3) = ao_power(j_ao,1:3)
@@ -411,13 +873,11 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
         coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
 
         ! First term = (x-Ax)**(ax+1)
-        integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj &
-                                          , beta, B_center, C_center, n_pt_in, mu_in )
+        integral = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in)
         ints(m) += integral * coef
 
         ! Second term = Ax * (x-Ax)**(ax)
-        integral = NAI_pol_mult_erf_with1s( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj &
-                                          , beta, B_center, C_center, n_pt_in, mu_in )
+        integral = NAI_pol_mult_erf_with1s(Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta, B_center, C_center, n_pt_in, mu_in)
         ints(m) += Ai_center(m) * integral * coef
 
       enddo
@@ -426,9 +886,9 @@ subroutine NAI_pol_x_mult_erf_ao_with1s(i_ao, j_ao, beta, B_center, mu_in, C_cen
 
 end subroutine NAI_pol_x_mult_erf_ao_with1s
 
-!--
+! ---
 
-subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_center, ints, n_points)
+subroutine NAI_pol_x_mult_erf_ao_with1s_v0(i_ao, j_ao, beta, B_center, LD_B, mu_in, C_center, LD_C, ints, LD_ints, n_points)
 
   BEGIN_DOC
   !
@@ -446,25 +906,23 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c
 
   implicit none
 
-  integer,          intent(in)   :: i_ao, j_ao, n_points
-  double precision, intent(in)   :: beta, B_center(n_points,3), mu_in, C_center(n_points,3)
-  double precision, intent(out)  :: ints(n_points,3)
+  integer,          intent(in)  :: i_ao, j_ao, LD_B, LD_C, LD_ints, n_points
+  double precision, intent(in)  :: beta, mu_in
+  double precision, intent(in)  :: B_center(LD_B,3), C_center(LD_C,3)
+  double precision, intent(out) :: ints(LD_ints,3)
 
-  integer                        :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
-  double precision               :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision              :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
 
-  integer                        :: ipoint
+  integer                       :: ipoint
   double precision, allocatable :: integral(:)
 
   if(beta .lt. 1d-10) then
-    call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, ints, n_points)
+    call NAI_pol_x_mult_erf_ao_v0(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
     return
   endif
 
-  ints(:,:) = 0.d0
-  if(ao_overlap_abs(j_ao,i_ao) .lt. 1.d-12) then
-    return
-  endif
+  ints(1:LD_ints,1:3) = 0.d0
 
   power_Ai(1:3) = ao_power(i_ao,1:3)
   power_Aj(1:3) = ao_power(j_ao,1:3)
@@ -475,6 +933,8 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c
   n_pt_in = n_pt_max_integrals
 
   allocate(integral(n_points))
+  integral = 0.d0
+
   do i = 1, ao_prim_num(i_ao)
     alphai = ao_expo_ordered_transp           (i,i_ao)
     coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
@@ -490,15 +950,17 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c
         coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
 
         ! First term = (x-Ax)**(ax+1)
-        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, &
-            alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points)
+
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3), LD_B, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
+
         do ipoint = 1, n_points
           ints(ipoint,m) += integral(ipoint) * coef
         enddo
 
         ! Second term = Ax * (x-Ax)**(ax)
-        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, &
-          alphaj, beta, B_center, C_center, n_pt_in, mu_in, integral, n_points)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3), LD_B, C_center(1:LD_C,1:3), LD_C, n_pt_in, mu_in, integral(1:n_points), n_points, n_points)
         do ipoint = 1, n_points
           ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef
         enddo
@@ -506,10 +968,100 @@ subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, mu_in, C_c
       enddo
     enddo
   enddo
+
   deallocate(integral)
 
-end subroutine NAI_pol_x_mult_erf_ao_with1s
+end subroutine NAI_pol_x_mult_erf_ao_with1s_v0
 
+! ---
+
+subroutine NAI_pol_x_mult_erf_ao_with1s_v(i_ao, j_ao, beta, B_center, LD_B, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+
+  BEGIN_DOC
+  !
+  ! Computes the following integral :
+  !
+  ! $\int_{-\infty}^{infty} dr x * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr y * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  ! $\int_{-\infty}^{infty} dr z * \chi_i(r) \chi_j(r) e^{-\beta (r - B_center)^2} \frac{\erf(\mu | r - R_C | )}{ | r - R_C | }$.
+  !
+  END_DOC
+
+  include 'utils/constants.include.F'
+
+  implicit none
+
+  integer,          intent(in)  :: i_ao, j_ao, LD_B, LD_C, LD_ints, n_points(3)
+  double precision, intent(in)  :: beta, mu_in
+  double precision, intent(in)  :: B_center(LD_B,3,3), C_center(LD_C,3,3)
+  double precision, intent(out) :: ints(LD_ints,3)
+
+  integer                       :: i, j, power_Ai(3), power_Aj(3), n_pt_in, power_xA(3), m
+  double precision              :: Ai_center(3), Aj_center(3), alphai, alphaj, coef, coefi
+
+  integer                       :: ipoint, n_points_m, LD_integral
+  double precision, allocatable :: integral(:)
+
+  if(beta .lt. 1d-10) then
+    print *, 'small beta', i_ao, j_ao
+    call NAI_pol_x_mult_erf_ao_v(i_ao, j_ao, mu_in, C_center, LD_C, ints, LD_ints, n_points)
+    return
+  endif
+
+  ints(1:LD_ints,1:3) = 0.d0
+
+  power_Ai(1:3) = ao_power(i_ao,1:3)
+  power_Aj(1:3) = ao_power(j_ao,1:3)
+
+  Ai_center(1:3) = nucl_coord(ao_nucl(i_ao),1:3)
+  Aj_center(1:3) = nucl_coord(ao_nucl(j_ao),1:3)
+
+  n_pt_in = n_pt_max_integrals
+
+  LD_integral = max(max(n_points(1), n_points(2)), n_points(3))
+  allocate(integral(LD_integral))
+  integral = 0.d0
+
+  do i = 1, ao_prim_num(i_ao)
+    alphai = ao_expo_ordered_transp           (i,i_ao)
+    coefi  = ao_coef_normalized_ordered_transp(i,i_ao)
+
+    do m = 1, 3
+      n_points_m = n_points(m)
+
+      ! x * phi_i(r) = x * (x-Ax)**ax = (x-Ax)**(ax+1) + Ax * (x-Ax)**ax
+      power_xA     = power_Ai
+      power_xA(m) += 1
+
+      do j = 1, ao_prim_num(j_ao)
+        alphaj = ao_expo_ordered_transp                   (j,j_ao)
+        coef   = coefi * ao_coef_normalized_ordered_transp(j,j_ao)
+
+        ! First term = (x-Ax)**(ax+1)
+
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_xA, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3,m), LD_B, C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += integral(ipoint) * coef
+        enddo
+
+        ! Second term = Ax * (x-Ax)**(ax)
+        call NAI_pol_mult_erf_with1s_v( Ai_center, Aj_center, power_Ai, power_Aj, alphai, alphaj, beta &
+                                      , B_center(1:LD_B,1:3,m), LD_B, C_center(1:LD_C,1:3,m), LD_C, n_pt_in, mu_in, integral(1:LD_integral), LD_integral, n_points_m)
+        do ipoint = 1, n_points_m
+          ints(ipoint,m) += Ai_center(m) * integral(ipoint) * coef
+        enddo
+
+      enddo
+    enddo
+  enddo
+
+  deallocate(integral)
+
+end subroutine NAI_pol_x_mult_erf_ao_with1s_v
 
 ! ---
 
diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
index facb6264..213a63e4 100644
--- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -1,4 +1,7 @@
+! ---
+
 subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints)
+
  implicit none
  BEGIN_DOC
 ! gauss_ints(m) = \int dr AO_i(r) AO_j(r) x/y/z e^{-delta |r-D_center|^2}
@@ -32,6 +35,7 @@ subroutine overlap_gauss_xyz_r12_ao(D_center,delta,i,j,gauss_ints)
    enddo
   enddo
  enddo
+
 end
 
 
@@ -152,24 +156,26 @@ end function overlap_gauss_r12_ao
 
 ! --
 
-subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points)
+subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
 
   BEGIN_DOC
+  !
   ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  !
+  ! n_points: nb of integrals <= min(LD_D, LD_resv)
+  !
   END_DOC
 
   implicit none
-  integer,          intent(in) :: i, j, n_points
-  double precision, intent(in) :: D_center(n_points,3), delta
-  double precision, intent(out) :: resv(n_points)
+  integer,          intent(in)  :: i, j, LD_D, LD_resv, n_points
+  double precision, intent(in)  :: D_center(LD_D,3), delta
+  double precision, intent(out) :: resv(LD_resv)
 
-  integer                      :: power_A(3), power_B(3), l, k
-  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1 
+  integer                       :: ipoint
+  integer                       :: power_A(3), power_B(3), l, k
+  double precision              :: A_center(3), B_center(3), alpha, beta, coef, coef1 
   double precision, allocatable :: analytical_j(:)
 
-  double precision, external   :: overlap_gauss_r12
-  integer                        :: ipoint
-
   resv(:) = 0.d0
   if(ao_overlap_abs(j,i).lt.1.d-12) then
     return
@@ -182,6 +188,7 @@ subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points)
   B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
 
   allocate(analytical_j(n_points))
+
   do l = 1, ao_prim_num(i)
     alpha = ao_expo_ordered_transp           (l,i)
     coef1 = ao_coef_normalized_ordered_transp(l,i)
@@ -192,15 +199,18 @@ subroutine overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points)
 
       if(dabs(coef) .lt. 1d-12) cycle
 
-      call overlap_gauss_r12_v(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points)
-      do ipoint=1, n_points
-        resv(ipoint) = resv(ipoint) + coef*analytical_j(ipoint)
+      call overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_A, power_B, alpha, beta, analytical_j, n_points, n_points)
+
+      do ipoint = 1, n_points
+        resv(ipoint) = resv(ipoint) + coef * analytical_j(ipoint)
       enddo
+
     enddo
   enddo
+
   deallocate(analytical_j)
 
-end
+end subroutine overlap_gauss_r12_ao_v
 
 ! ---
 
@@ -274,7 +284,8 @@ end function overlap_gauss_r12_ao_with1s
 
 ! ---
 
-subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j, resv, n_points)
+subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
+
   BEGIN_DOC
   !
   ! \int dr AO_i(r) AO_j(r) e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2}
@@ -283,18 +294,16 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j,
   END_DOC
 
   implicit none
-  integer,          intent(in)  :: i, j, n_points
-  double precision, intent(in)  :: B_center(3), beta, D_center(n_points,3), delta
-  double precision, intent(out) :: resv(n_points)
-
-  integer                      :: power_A1(3), power_A2(3), l, k
-  double precision             :: A1_center(3), A2_center(3), alpha1, alpha2, coef1
-  double precision             :: coef12, coef12f
-  double precision             :: gama, gama_inv
-  double precision             :: bg, dg, bdg
-
-  integer :: ipoint
+  integer,          intent(in)  :: i, j, n_points, LD_D, LD_resv
+  double precision, intent(in)  :: B_center(3), beta, D_center(LD_D,3), delta
+  double precision, intent(out) :: resv(LD_resv)
 
+  integer                       :: ipoint
+  integer                       :: power_A1(3), power_A2(3), l, k
+  double precision              :: A1_center(3), A2_center(3), alpha1, alpha2, coef1
+  double precision              :: coef12, coef12f
+  double precision              :: gama, gama_inv
+  double precision              :: bg, dg, bdg
   double precision, allocatable :: fact_g(:), G_center(:,:), analytical_j(:)
 
   if(ao_overlap_abs(j,i) .lt. 1.d-12) then
@@ -304,7 +313,9 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j,
   ASSERT(beta .gt. 0.d0)
 
   if(beta .lt. 1d-10) then
-    call overlap_gauss_r12_ao_v(D_center, delta, i, j, resv, n_points)
+
+    call overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
+
     return
   endif
 
@@ -312,8 +323,8 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j,
 
   ! e^{-beta |r-B_center^2|} e^{-delta |r-D_center|^2} = fact_g e^{-gama |r - G|^2}
 
-  gama        = beta + delta
-  gama_inv    = 1.d0 / gama
+  gama     = beta + delta
+  gama_inv = 1.d0 / gama
 
   power_A1(1:3) = ao_power(i,1:3)
   power_A2(1:3) = ao_power(j,1:3)
@@ -323,8 +334,8 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j,
 
   allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) )
 
-  bg = beta * gama_inv
-  dg = delta * gama_inv
+  bg  = beta  * gama_inv
+  dg  = delta * gama_inv
   bdg = bg * delta 
   do ipoint=1,n_points
     G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1)
@@ -343,10 +354,8 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j,
 
   enddo
 
-    ! ---
-
   do l = 1, ao_prim_num(i)
-    alpha1 = ao_expo_ordered_transp                    (l,i)
+    alpha1 = ao_expo_ordered_transp           (l,i)
     coef1  = ao_coef_normalized_ordered_transp(l,i)
 
     do k = 1, ao_prim_num(j)
@@ -354,19 +363,19 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, delta, i, j,
       coef12 = coef1 * ao_coef_normalized_ordered_transp(k,j)
       if(dabs(coef12) .lt. 1d-12) cycle
 
-      call overlap_gauss_r12_v(G_center, gama, A1_center,&
-            A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points)
+      call overlap_gauss_r12_v(G_center, n_points, gama, A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, analytical_j, n_points, n_points)
 
-      do ipoint=1,n_points
+      do ipoint = 1, n_points
         coef12f = coef12 * fact_g(ipoint)
         resv(ipoint) += coef12f * analytical_j(ipoint)
       end do
 
     enddo
   enddo
-  deallocate (fact_g, G_center, analytical_j )
 
+  deallocate(fact_g, G_center, analytical_j)
 
-end
+end subroutine overlap_gauss_r12_ao_with1s_v
 
+! ---
 
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index cdc86456..b7fe234f 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -11,65 +11,72 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
 
   implicit none
   integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
   double precision              :: coef, beta, B_center(3)
   double precision              :: tmp
   double precision              :: wall0, wall1
 
-  double precision, allocatable :: int_fit_v(:)
+  double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points_transp j1b_pen
+  provide mu_erf final_grid_points j1b_pen
   call wall_time(wall0)
 
- int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0
+  int2_grad1u2_grad2u2_j1b2 = 0.d0
 
- !$OMP PARALLEL DEFAULT (NONE)                                       &
-     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
-     !$OMP          coef_fit, expo_fit, int_fit_v, tmp)                &
-     !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
-     !$OMP          final_grid_points_transp, n_max_fit_slat,               &
-     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
-     !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,    &
-     !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,&
-     !$OMP          ao_overlap_abs)
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,         &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
 
- allocate(int_fit_v(n_points_final_grid))
- !$OMP DO SCHEDULE(dynamic)
- do i = 1, ao_num
-   do j = i, ao_num
+    do i = 1, ao_num
+      do j = i, ao_num
 
-     if(ao_overlap_abs(j,i) .lt. 1.d-12) then
-       cycle
-     endif
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
 
-     do i_1s = 1, List_all_comb_b3_size
+          expo_fit = expo_gauss_1_erf_x_2(i_fit)
+          coef_fit = coef_gauss_1_erf_x_2(i_fit)
 
-       coef        = List_all_comb_b3_coef  (i_1s)
-       beta        = List_all_comb_b3_expo  (i_1s)
-       B_center(1) = List_all_comb_b3_cent(1,i_1s)
-       B_center(2) = List_all_comb_b3_cent(2,i_1s)
-       B_center(3) = List_all_comb_b3_cent(3,i_1s)
+          ! ---
 
-       do i_fit = 1, n_max_fit_slat
+          int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
+          tmp += -0.25d0 * coef_fit * int_fit
+          if(dabs(int_fit) .lt. 1d-10) cycle
 
-         expo_fit = expo_gauss_1_erf_x_2(i_fit)
-         coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+          ! ---
 
-         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
-               expo_fit, i, j, int_fit_v, n_points_final_grid)
+          do i_1s = 2, List_all_comb_b3_size
 
-         do ipoint = 1, n_points_final_grid
-           int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
-         enddo
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
 
-       enddo
+            int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
 
-     enddo
-   enddo
- enddo
+            tmp += -0.25d0 * coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
  !$OMP END DO
- deallocate(int_fit_v)
  !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
@@ -83,7 +90,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
   call wall_time(wall1)
   print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0
 
-END_PROVIDER
+END_PROVIDER 
 
 ! ---
 
@@ -96,60 +103,73 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
   END_DOC
 
   implicit none
-  integer                        :: i, j, ipoint, i_1s, i_fit
-  double precision               :: r(3), expo_fit, coef_fit
-  double precision               :: coef, beta, B_center(3), tmp
-  double precision               :: wall0, wall1
-  double precision, allocatable  :: int_fit_v(:)
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3), tmp
+  double precision              :: wall0, wall1
 
-  double precision, external     :: overlap_gauss_r12_ao_with1s
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points_transp j1b_pen
+  provide mu_erf final_grid_points j1b_pen
   call wall_time(wall0)
 
-  int2_u2_j1b2(:,:,:) = 0.d0
+  int2_u2_j1b2 = 0.d0
 
-  !$OMP PARALLEL DEFAULT (NONE)                                      &
-      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
-      !$OMP          coef_fit, expo_fit, int_fit_v)                  &
-      !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
-      !$OMP          final_grid_points_transp, n_max_fit_slat,       &
-      !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,       &
-      !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,   &
-      !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
-  allocate(int_fit_v(n_points_final_grid))
-  !$OMP DO SCHEDULE(dynamic)
-  do i = 1, ao_num
-    do j = i, ao_num
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
 
-      do i_1s = 1, List_all_comb_b3_size
+    do i = 1, ao_num
+      do j = i, ao_num
 
-        coef        = List_all_comb_b3_coef  (i_1s)
-        beta        = List_all_comb_b3_expo  (i_1s)
-        B_center(1) = List_all_comb_b3_cent(1,i_1s)
-        B_center(2) = List_all_comb_b3_cent(2,i_1s)
-        B_center(3) = List_all_comb_b3_cent(3,i_1s)
-
-        do i_fit = 1, n_max_fit_slat
+        tmp = 0.d0
+        do i_fit = 1, ng_fit_jast
 
           expo_fit = expo_gauss_j_mu_x_2(i_fit)
-          coef_fit = coef_gauss_j_mu_x_2(i_fit) * coef
+          coef_fit = coef_gauss_j_mu_x_2(i_fit)
 
-          call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
-                expo_fit, i, j, int_fit_v, n_points_final_grid)
+          ! ---
 
-          do ipoint = 1, n_points_final_grid
-            int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+          int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
+          tmp += coef_fit * int_fit
+          if(dabs(int_fit) .lt. 1d-10) cycle
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b3_size
+         
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+
+            int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
           enddo
 
+          ! ---
+
         enddo
 
+        int2_u2_j1b2(j,i,ipoint) = tmp
       enddo
     enddo
   enddo
-  !$OMP END DO
-  deallocate(int_fit_v)
-  !$OMP END PARALLEL
+ !$OMP END DO
+ !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
@@ -162,7 +182,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
   call wall_time(wall1)
   print*, ' wall time for int2_u2_j1b2', wall1 - wall0
 
-END_PROVIDER
+END_PROVIDER 
 
 ! ---
 
@@ -175,95 +195,97 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   END_DOC
 
   implicit none
-  integer                        :: i, j, ipoint, i_1s, i_fit
-  double precision               :: r(3), expo_fit, coef_fit
-  double precision               :: coef, beta, B_center(3)
-  double precision               :: alpha_1s, alpha_1s_inv, expo_coef_1s, coef_tmp
-  double precision               :: tmp_x, tmp_y, tmp_z
-  double precision               :: wall0, wall1
-  double precision, allocatable  :: int_fit_v(:,:), dist(:), centr_1s(:,:)
+  integer          :: i, j, ipoint, i_1s, i_fit
+  double precision :: r(3), int_fit(3), expo_fit, coef_fit
+  double precision :: coef, beta, B_center(3), dist
+  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
 
-  provide mu_erf final_grid_points_transp j1b_pen
+  provide mu_erf final_grid_points j1b_pen
   call wall_time(wall0)
 
-  allocate(dist(n_points_final_grid), centr_1s(n_points_final_grid,3))
+  int2_u_grad1u_x_j1b2 = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
+ !$OMP          tmp_x, tmp_y, tmp_z)                                & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
+ !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
+ !$OMP DO
+
   do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points_transp(ipoint,1)
-    r(2) = final_grid_points_transp(ipoint,2)
-    r(3) = final_grid_points_transp(ipoint,3)
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
 
-    dist(ipoint) = (B_center(1) - r(1)) * (B_center(1) - r(1))  &
-                 + (B_center(2) - r(2)) * (B_center(2) - r(2))  &
-                 + (B_center(3) - r(3)) * (B_center(3) - r(3))
-  enddo
+    do i = 1, ao_num
+      do j = i, ao_num
 
-  int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_fit = 1, ng_fit_jast
 
-  !$OMP PARALLEL DEFAULT (NONE)                                      &
-      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
-      !$OMP          coef_fit, expo_fit, int_fit_v, alpha_1s,        &
-      !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp, &
-      !$OMP          tmp_x, tmp_y, tmp_z)                            &
-      !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
-      !$OMP          final_grid_points_transp, n_max_fit_slat, dist, &
-      !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,   &
-      !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,   &
-      !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
-  allocate(int_fit_v(n_points_final_grid,3))
+          expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+          coef_fit = coef_gauss_j_mu_1_erf(i_fit)
 
-  do i_1s = 1, List_all_comb_b3_size
+          ! ---
 
-    coef        = List_all_comb_b3_coef  (i_1s)
-    beta        = List_all_comb_b3_expo  (i_1s)
-    B_center(1) = List_all_comb_b3_cent(1,i_1s)
-    B_center(2) = List_all_comb_b3_cent(2,i_1s)
-    B_center(3) = List_all_comb_b3_cent(3,i_1s)
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r, int_fit)
+          tmp_x += coef_fit * int_fit(1)
+          tmp_y += coef_fit * int_fit(2)
+          tmp_z += coef_fit * int_fit(3)
+          if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
 
-    do i_fit = 1, n_max_fit_slat
+          ! ---
 
-      expo_fit = expo_gauss_j_mu_1_erf(i_fit)
-      coef_fit = coef_gauss_j_mu_1_erf(i_fit) * coef
+          do i_1s = 2, List_all_comb_b3_size
+          
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
 
-      alpha_1s     = beta + expo_fit
-      alpha_1s_inv = 1.d0 / alpha_1s
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
 
-      do ipoint = 1, n_points_final_grid
-        r(1) = final_grid_points_transp(ipoint,1)
-        r(2) = final_grid_points_transp(ipoint,2)
-        r(3) = final_grid_points_transp(ipoint,3)
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
 
-        centr_1s(ipoint,1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
-        centr_1s(ipoint,2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
-        centr_1s(ipoint,3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
-      enddo
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-10) cycle
+            
+            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
-      expo_coef_1s = beta * expo_fit * alpha_1s_inv 
-      !$OMP BARRIER
-      !$OMP DO SCHEDULE(dynamic)
-      do i = 1, ao_num
-        do j = i, ao_num
-          call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s,&
-              1.d+9, final_grid_points_transp, int_fit_v, n_points_final_grid)
-
-          do ipoint = 1, n_points_final_grid
-            coef_tmp = coef_fit * dexp(-expo_coef_1s* dist(ipoint))
-            int2_u_grad1u_x_j1b2(1,j,i,ipoint) = &
-              int2_u_grad1u_x_j1b2(1,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,1)
-            int2_u_grad1u_x_j1b2(2,j,i,ipoint) = &
-              int2_u_grad1u_x_j1b2(2,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,2)
-            int2_u_grad1u_x_j1b2(3,j,i,ipoint) = &
-              int2_u_grad1u_x_j1b2(3,j,i,ipoint) + coef_tmp * int_fit_v(ipoint,3)
+            tmp_x += coef_tmp * int_fit(1)
+            tmp_y += coef_tmp * int_fit(2)
+            tmp_z += coef_tmp * int_fit(3)
           enddo
-        enddo
-      enddo
-      !$OMP END DO NOWAIT
 
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x
+        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y
+        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z
+      enddo
     enddo
   enddo
-  deallocate(int_fit_v)
-  !$OMP END PARALLEL
-
-  deallocate(dist)
+ !$OMP END DO
+ !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
@@ -278,7 +300,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   call wall_time(wall1)
   print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0
 
-END_PROVIDER
+END_PROVIDER 
 
 ! ---
 
@@ -306,11 +328,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
  !$OMP PARALLEL DEFAULT (NONE)                                      &
  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
  !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, &
- !$OMP          final_grid_points, n_max_fit_slat,                  &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
  !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
  !$OMP          List_all_comb_b3_cent, int2_u_grad1u_j1b2)
  !$OMP DO
   do ipoint = 1, n_points_final_grid
@@ -321,37 +343,48 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
         r(3) = final_grid_points(3,ipoint)
 
         tmp = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
+        do i_fit = 1, ng_fit_jast
 
-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
-          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
-                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
-                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+          expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+          coef_fit = coef_gauss_j_mu_1_erf(i_fit)
 
-          do i_fit = 1, n_max_fit_slat
+          ! ---
 
-            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
-            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+          int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r)
+          if(dabs(int_fit) .lt. 1d-10) cycle
+
+          tmp += coef_fit * int_fit
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b3_size
+
+            coef        = List_all_comb_b3_coef  (i_1s)
+            beta        = List_all_comb_b3_expo  (i_1s)
+            B_center(1) = List_all_comb_b3_cent(1,i_1s)
+            B_center(2) = List_all_comb_b3_cent(2,i_1s)
+            B_center(3) = List_all_comb_b3_cent(3,i_1s)
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3))
 
             alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s
+            alpha_1s_inv = 1.d0 / alpha_1s 
             centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
             centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
             centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-            !if(expo_coef_1s .gt. 80.d0) cycle
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            !if(dabs(coef_tmp) .lt. 1d-10) cycle
-
+            if(dabs(coef_tmp) .lt. 1d-10) cycle
+            
             int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
 
             tmp += coef_tmp * int_fit
           enddo
+
+          ! ---
+
         enddo
 
         int2_u_grad1u_j1b2(j,i,ipoint) = tmp
@@ -372,7 +405,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
   call wall_time(wall1)
   print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0
 
-END_PROVIDER
+END_PROVIDER 
 
 ! ---
 
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
new file mode 100644
index 00000000..6d3931f5
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
@@ -0,0 +1,453 @@
+!
+!! ---
+!
+!BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! -\frac{1}{4} int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: i, j, ipoint, i_1s, i_fit
+!  integer                       :: i_mask_grid
+!  double precision              :: r(3), expo_fit, coef_fit
+!  double precision              :: coef, beta, B_center(3)
+!  double precision              :: wall0, wall1
+!
+!  integer,          allocatable :: n_mask_grid(:)
+!  double precision, allocatable :: r_mask_grid(:,:)
+!  double precision, allocatable :: int_fit_v(:)
+!
+!  print*, ' providing int2_grad1u2_grad2u2_j1b2'
+!
+!  provide mu_erf final_grid_points_transp j1b_pen
+!  call wall_time(wall0)
+!
+!  int2_grad1u2_grad2u2_j1b2(:,:,:) = 0.d0
+!
+! !$OMP PARALLEL DEFAULT (NONE)                                     &
+! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+! !$OMP          coef_fit, expo_fit, int_fit_v, n_mask_grid,        &
+! !$OMP          i_mask_grid, r_mask_grid)                          &
+! !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,&
+! !$OMP          final_grid_points_transp, n_max_fit_slat,          &
+! !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,        &
+! !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,      &
+! !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2,  &
+! !$OMP          ao_overlap_abs)
+!
+!  allocate(int_fit_v(n_points_final_grid))
+!  allocate(n_mask_grid(n_points_final_grid))
+!  allocate(r_mask_grid(n_points_final_grid,3))
+!
+! !$OMP DO SCHEDULE(dynamic)
+!  do i = 1, ao_num
+!    do j = i, ao_num
+!
+!      if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+!        cycle
+!      endif
+!
+!      do i_fit = 1, n_max_fit_slat
+!
+!        expo_fit = expo_gauss_1_erf_x_2(i_fit)
+!        coef_fit = coef_gauss_1_erf_x_2(i_fit) * (-0.25d0)
+!
+!        ! ---
+!
+!        call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+!
+!        i_mask_grid = 0    ! dim
+!        n_mask_grid = 0    ! ind
+!        r_mask_grid = 0.d0 ! val
+!        do ipoint = 1, n_points_final_grid
+!
+!          int2_grad1u2_grad2u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+!
+!          if(dabs(int_fit_v(ipoint)) .gt. 1d-10) then
+!            i_mask_grid += 1
+!            n_mask_grid(i_mask_grid  ) = ipoint
+!            r_mask_grid(i_mask_grid,1) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid,2) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid,3) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!        enddo
+!
+!        if(i_mask_grid .eq. 0) cycle
+!
+!        ! ---
+!
+!        do i_1s = 2, List_all_comb_b3_size
+!
+!          coef        = List_all_comb_b3_coef  (i_1s) * coef_fit
+!          beta        = List_all_comb_b3_expo  (i_1s)
+!          B_center(1) = List_all_comb_b3_cent(1,i_1s)
+!          B_center(2) = List_all_comb_b3_cent(2,i_1s)
+!          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+!
+!          call overlap_gauss_r12_ao_with1s_v(B_center, beta, r_mask_grid, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, i_mask_grid)
+!
+!          do ipoint = 1, i_mask_grid
+!            int2_grad1u2_grad2u2_j1b2(j,i,n_mask_grid(ipoint)) += coef * int_fit_v(ipoint)
+!          enddo
+!
+!        enddo
+!
+!        ! ---
+!
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END DO
+!
+!  deallocate(n_mask_grid)
+!  deallocate(r_mask_grid)
+!  deallocate(int_fit_v)
+!
+! !$OMP END PARALLEL
+!
+!  do ipoint = 1, n_points_final_grid
+!    do i = 2, ao_num
+!      do j = 1, i-1
+!        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
+!      enddo
+!    enddo
+!  enddo
+!
+!  call wall_time(wall1)
+!  print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0
+!
+!END_PROVIDER
+!
+!! ---
+!
+!BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: i, j, ipoint, i_1s, i_fit
+!  integer                       :: i_mask_grid
+!  double precision              :: r(3), expo_fit, coef_fit
+!  double precision              :: coef, beta, B_center(3), tmp
+!  double precision              :: wall0, wall1
+!
+!  integer,          allocatable :: n_mask_grid(:)
+!  double precision, allocatable :: r_mask_grid(:,:)
+!  double precision, allocatable :: int_fit_v(:)
+!
+!  print*, ' providing int2_u2_j1b2'
+!
+!  provide mu_erf final_grid_points_transp j1b_pen
+!  call wall_time(wall0)
+!
+!  int2_u2_j1b2(:,:,:) = 0.d0
+!
+! !$OMP PARALLEL DEFAULT (NONE)                                      &
+! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+! !$OMP          coef_fit, expo_fit, int_fit_v,                      &
+! !$OMP          i_mask_grid, n_mask_grid, r_mask_grid )             &
+! !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, &
+! !$OMP          final_grid_points_transp, n_max_fit_slat,           &
+! !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+! !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       &
+! !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
+!
+!  allocate(n_mask_grid(n_points_final_grid))
+!  allocate(r_mask_grid(n_points_final_grid,3))
+!  allocate(int_fit_v(n_points_final_grid))
+!
+! !$OMP DO SCHEDULE(dynamic)
+!  do i = 1, ao_num
+!    do j = i, ao_num
+!
+!      do i_fit = 1, n_max_fit_slat
+!
+!        expo_fit = expo_gauss_j_mu_x_2(i_fit)
+!        coef_fit = coef_gauss_j_mu_x_2(i_fit)
+!
+!        ! ---
+!
+!        call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+!
+!        i_mask_grid = 0    ! dim
+!        n_mask_grid = 0    ! ind
+!        r_mask_grid = 0.d0 ! val
+!
+!        do ipoint = 1, n_points_final_grid
+!          int2_u2_j1b2(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+!
+!          if(dabs(int_fit_v(ipoint)) .gt. 1d-10) then
+!            i_mask_grid += 1
+!            n_mask_grid(i_mask_grid  ) = ipoint
+!            r_mask_grid(i_mask_grid,1) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid,2) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid,3) = final_grid_points_transp(ipoint,3)
+!          endif
+!        enddo
+!
+!        if(i_mask_grid .eq. 0) cycle
+!
+!        ! ---
+!
+!        do i_1s = 2, List_all_comb_b3_size
+!
+!          coef        = List_all_comb_b3_coef  (i_1s) * coef_fit
+!          beta        = List_all_comb_b3_expo  (i_1s)
+!          B_center(1) = List_all_comb_b3_cent(1,i_1s)
+!          B_center(2) = List_all_comb_b3_cent(2,i_1s)
+!          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+!
+!          call overlap_gauss_r12_ao_with1s_v(B_center, beta, r_mask_grid, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, i_mask_grid)
+!
+!          do ipoint = 1, i_mask_grid
+!            int2_u2_j1b2(j,i,n_mask_grid(ipoint)) += coef * int_fit_v(ipoint)
+!          enddo
+!
+!        enddo
+!
+!        ! ---
+!
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END DO
+!
+!  deallocate(n_mask_grid)
+!  deallocate(r_mask_grid)
+!  deallocate(int_fit_v)
+!
+! !$OMP END PARALLEL
+!
+!  do ipoint = 1, n_points_final_grid
+!    do i = 2, ao_num
+!      do j = 1, i-1
+!        int2_u2_j1b2(j,i,ipoint) = int2_u2_j1b2(i,j,ipoint)
+!      enddo
+!    enddo
+!  enddo
+!
+!  call wall_time(wall1)
+!  print*, ' wall time for int2_u2_j1b2', wall1 - wall0
+!
+!END_PROVIDER
+!
+!! ---
+!
+!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+!  !
+!  END_DOC
+!
+!  implicit none
+!
+!  integer                       :: i, j, ipoint, i_1s, i_fit
+!  integer                       :: i_mask_grid1, i_mask_grid2, i_mask_grid3, i_mask_grid(3)
+!  double precision              :: x, y, z, expo_fit, coef_fit
+!  double precision              :: coef, beta, B_center(3)
+!  double precision              :: alpha_1s, alpha_1s_inv, expo_coef_1s
+!  double precision              :: wall0, wall1
+!
+!  integer,          allocatable :: n_mask_grid(:,:)
+!  double precision, allocatable :: r_mask_grid(:,:,:)
+!  double precision, allocatable :: int_fit_v(:,:), dist(:,:), centr_1s(:,:,:)
+!
+!  print*, ' providing int2_u_grad1u_x_j1b2'
+!
+!  provide mu_erf final_grid_points_transp j1b_pen
+!  call wall_time(wall0)
+!
+!  int2_u_grad1u_x_j1b2(:,:,:,:) = 0.d0
+!
+! !$OMP PARALLEL DEFAULT (NONE)                                          &
+! !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, x, y, z, coef, beta,         &
+! !$OMP          coef_fit, expo_fit, int_fit_v, alpha_1s, dist, B_center,&
+! !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s,                   &
+! !$OMP          i_mask_grid1, i_mask_grid2, i_mask_grid3, i_mask_grid,  &
+! !$OMP          n_mask_grid, r_mask_grid)                               &
+! !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size,     &
+! !$OMP          final_grid_points_transp, n_max_fit_slat,               &
+! !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,           &
+! !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,           &
+! !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
+!
+!  allocate(dist(n_points_final_grid,3))
+!  allocate(centr_1s(n_points_final_grid,3,3))
+!  allocate(n_mask_grid(n_points_final_grid,3))
+!  allocate(r_mask_grid(n_points_final_grid,3,3))
+!  allocate(int_fit_v(n_points_final_grid,3))
+!
+! !$OMP DO SCHEDULE(dynamic)
+!  do i = 1, ao_num
+!    do j = i, ao_num
+!      do i_fit = 1, n_max_fit_slat
+!
+!        expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+!        coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+!
+!        ! ---
+!
+!        call NAI_pol_x_mult_erf_ao_with1s_v0(i, j, expo_fit, final_grid_points_transp, n_points_final_grid, 1.d+9, final_grid_points_transp, n_points_final_grid, int_fit_v, n_points_final_grid, n_points_final_grid)
+!
+!        i_mask_grid1 = 0    ! dim
+!        i_mask_grid2 = 0    ! dim
+!        i_mask_grid3 = 0    ! dim
+!        n_mask_grid  = 0    ! ind
+!        r_mask_grid  = 0.d0 ! val
+!        do ipoint = 1, n_points_final_grid
+!
+!          ! ---
+!
+!          int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_fit * int_fit_v(ipoint,1)
+!
+!          if(dabs(int_fit_v(ipoint,1)) .gt. 1d-10) then
+!            i_mask_grid1 += 1
+!            n_mask_grid(i_mask_grid1,  1) = ipoint
+!            r_mask_grid(i_mask_grid1,1,1) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid1,2,1) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid1,3,1) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!          ! ---
+!
+!          int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_fit * int_fit_v(ipoint,2)
+!
+!          if(dabs(int_fit_v(ipoint,2)) .gt. 1d-10) then
+!            i_mask_grid2 += 1
+!            n_mask_grid(i_mask_grid2,  2) = ipoint
+!            r_mask_grid(i_mask_grid2,1,2) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid2,2,2) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid2,3,2) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!          ! ---
+!
+!          int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_fit * int_fit_v(ipoint,3)
+!
+!          if(dabs(int_fit_v(ipoint,3)) .gt. 1d-10) then
+!            i_mask_grid3 += 1
+!            n_mask_grid(i_mask_grid3,  3) = ipoint
+!            r_mask_grid(i_mask_grid3,1,3) = final_grid_points_transp(ipoint,1)
+!            r_mask_grid(i_mask_grid3,2,3) = final_grid_points_transp(ipoint,2)
+!            r_mask_grid(i_mask_grid3,3,3) = final_grid_points_transp(ipoint,3)
+!          endif
+!
+!          ! ---
+!
+!        enddo
+!
+!        if((i_mask_grid1+i_mask_grid2+i_mask_grid3) .eq. 0) cycle
+!
+!        i_mask_grid(1) = i_mask_grid1
+!        i_mask_grid(2) = i_mask_grid2
+!        i_mask_grid(3) = i_mask_grid3
+!
+!        ! ---
+!
+!        do i_1s = 2, List_all_comb_b3_size
+!
+!          coef        = List_all_comb_b3_coef  (i_1s) * coef_fit
+!          beta        = List_all_comb_b3_expo  (i_1s)
+!          B_center(1) = List_all_comb_b3_cent(1,i_1s)
+!          B_center(2) = List_all_comb_b3_cent(2,i_1s)
+!          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+!
+!          alpha_1s     = beta + expo_fit
+!          alpha_1s_inv = 1.d0 / alpha_1s
+!          expo_coef_1s = beta * expo_fit * alpha_1s_inv 
+!
+!          do ipoint = 1, i_mask_grid1
+!
+!            x = r_mask_grid(ipoint,1,1)
+!            y = r_mask_grid(ipoint,2,1)
+!            z = r_mask_grid(ipoint,3,1)
+!
+!            centr_1s(ipoint,1,1) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x)
+!            centr_1s(ipoint,2,1) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y)
+!            centr_1s(ipoint,3,1) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z)
+!
+!            dist(ipoint,1) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid2
+!
+!            x = r_mask_grid(ipoint,1,2)
+!            y = r_mask_grid(ipoint,2,2)
+!            z = r_mask_grid(ipoint,3,2)
+!
+!            centr_1s(ipoint,1,2) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x)
+!            centr_1s(ipoint,2,2) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y)
+!            centr_1s(ipoint,3,2) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z)
+!
+!            dist(ipoint,2) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid3
+!
+!            x = r_mask_grid(ipoint,1,3)
+!            y = r_mask_grid(ipoint,2,3)
+!            z = r_mask_grid(ipoint,3,3)
+!
+!            centr_1s(ipoint,1,3) = alpha_1s_inv * (beta * B_center(1) + expo_fit * x)
+!            centr_1s(ipoint,2,3) = alpha_1s_inv * (beta * B_center(2) + expo_fit * y)
+!            centr_1s(ipoint,3,3) = alpha_1s_inv * (beta * B_center(3) + expo_fit * z)
+!
+!            dist(ipoint,3) = (B_center(1) - x) * (B_center(1) - x) + (B_center(2) - y) * (B_center(2) - y) + (B_center(3) - z) * (B_center(3) - z)
+!          enddo
+!
+!          call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s, n_points_final_grid, 1.d+9, r_mask_grid, n_points_final_grid, int_fit_v, n_points_final_grid, i_mask_grid)
+!
+!          do ipoint = 1, i_mask_grid1
+!            int2_u_grad1u_x_j1b2(1,j,i,n_mask_grid(ipoint,1)) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid2
+!            int2_u_grad1u_x_j1b2(2,j,i,n_mask_grid(ipoint,2)) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2)
+!          enddo
+!
+!          do ipoint = 1, i_mask_grid3
+!            int2_u_grad1u_x_j1b2(3,j,i,n_mask_grid(ipoint,3)) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3)
+!          enddo
+!
+!        enddo
+!
+!        ! ---
+!
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END DO
+!
+!  deallocate(dist)
+!  deallocate(centr_1s)
+!  deallocate(n_mask_grid)
+!  deallocate(r_mask_grid)
+!  deallocate(int_fit_v)
+!
+! !$OMP END PARALLEL
+!
+!  do ipoint = 1, n_points_final_grid
+!    do i = 2, ao_num
+!      do j = 1, i-1
+!        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint)
+!        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint)
+!        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+!      enddo
+!    enddo
+!  enddo
+!
+!  call wall_time(wall1)
+!  print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0
+!
+!END_PROVIDER
+!
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
index 552e7069..6a662533 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
@@ -38,7 +38,24 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
       do j = i, ao_num
 
         tmp = 0.d0
-        do i_1s = 1, List_all_comb_b2_size
+
+        ! ---
+
+        coef        = List_all_comb_b2_coef  (1)
+        beta        = List_all_comb_b2_expo  (1)
+        B_center(1) = List_all_comb_b2_cent(1,1)
+        B_center(2) = List_all_comb_b2_cent(2,1)
+        B_center(3) = List_all_comb_b2_cent(3,1)
+
+        int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+        int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+        if(dabs(int_mu - int_coulomb) .lt. 1d-10) cycle
+
+        tmp += coef * (int_mu - int_coulomb)
+
+        ! ---
+
+        do i_1s = 2, List_all_comb_b2_size
 
           coef        = List_all_comb_b2_coef  (i_1s)
           beta        = List_all_comb_b2_expo  (i_1s)
@@ -52,6 +69,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
           tmp += coef * (int_mu - int_coulomb)
         enddo
 
+        ! ---
+
         v_ij_erf_rk_cst_mu_j1b(j,i,ipoint) = tmp
       enddo
     enddo
@@ -138,7 +157,27 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
         tmp_x = 0.d0
         tmp_y = 0.d0
         tmp_z = 0.d0
-        do i_1s = 1, List_all_comb_b2_size
+
+        ! ---
+
+        coef        = List_all_comb_b2_coef  (1)
+        beta        = List_all_comb_b2_expo  (1)
+        B_center(1) = List_all_comb_b2_cent(1,1)
+        B_center(2) = List_all_comb_b2_cent(2,1)
+        B_center(3) = List_all_comb_b2_cent(3,1)
+
+        call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+        call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+        if( (dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
+
+        tmp_x += coef * (ints(1) - ints_coulomb(1))
+        tmp_y += coef * (ints(2) - ints_coulomb(2))
+        tmp_z += coef * (ints(3) - ints_coulomb(3))
+
+        ! ---
+
+        do i_1s = 2, List_all_comb_b2_size
 
           coef        = List_all_comb_b2_coef  (i_1s)
           beta        = List_all_comb_b2_expo  (i_1s)
@@ -154,6 +193,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
           tmp_z += coef * (ints(3) - ints_coulomb(3))
         enddo
 
+        ! ---
+
         x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = tmp_x
         x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = tmp_y
         x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = tmp_z
@@ -207,7 +248,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
  !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
+ !$OMP          final_grid_points, ng_fit_jast,                     &
  !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
  !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo,       & 
  !$OMP          List_all_comb_b2_cent, v_ij_u_cst_mu_j1b)
@@ -222,22 +263,41 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
       do j = i, ao_num
 
         tmp = 0.d0
-        do i_1s = 1, List_all_comb_b2_size
+        do i_fit = 1, ng_fit_jast
 
-          coef        = List_all_comb_b2_coef  (i_1s)
-          beta        = List_all_comb_b2_expo  (i_1s)
-          B_center(1) = List_all_comb_b2_cent(1,i_1s)
-          B_center(2) = List_all_comb_b2_cent(2,i_1s)
-          B_center(3) = List_all_comb_b2_cent(3,i_1s)
+          expo_fit = expo_gauss_j_mu_x(i_fit)
+          coef_fit = coef_gauss_j_mu_x(i_fit)
 
-          do i_fit = 1, n_max_fit_slat
+          ! ---
 
-            expo_fit = expo_gauss_j_mu_x(i_fit)
-            coef_fit = coef_gauss_j_mu_x(i_fit)
-            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+          coef        = List_all_comb_b2_coef  (1)
+          beta        = List_all_comb_b2_expo  (1)
+          B_center(1) = List_all_comb_b2_cent(1,1)
+          B_center(2) = List_all_comb_b2_cent(2,1)
+          B_center(3) = List_all_comb_b2_cent(3,1)
+
+          int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+          if(dabs(int_fit) .lt. 1d-10) cycle
+
+          tmp += coef * coef_fit * int_fit
+
+          ! ---
+
+          do i_1s = 2, List_all_comb_b2_size
+
+            coef        = List_all_comb_b2_coef  (i_1s)
+            beta        = List_all_comb_b2_expo  (i_1s)
+            B_center(1) = List_all_comb_b2_cent(1,i_1s)
+            B_center(2) = List_all_comb_b2_cent(2,i_1s)
+            B_center(3) = List_all_comb_b2_cent(3,i_1s)
+
+            int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
 
             tmp += coef * coef_fit * int_fit
           enddo
+
+          ! ---
+
         enddo
 
         v_ij_u_cst_mu_j1b(j,i,ipoint) = tmp
diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f
index 1178cc31..0b40170c 100644
--- a/src/ao_many_one_e_ints/listj1b.irp.f
+++ b/src/ao_many_one_e_ints/listj1b.irp.f
@@ -168,7 +168,7 @@ END_PROVIDER
 
     do j = 1, nucl_num
       tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
-      print*,List_all_comb_b3(j,i),j1b_pen(j)
+      !print*, List_all_comb_b3(j,i), j1b_pen(j)
       List_all_comb_b3_expo(i)   += tmp_alphaj
       List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1)
       List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2)
@@ -220,6 +220,10 @@ END_PROVIDER
     List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i))
   enddo
 
+  print *, ' 1st coeff & expo of lists'
+  print*, List_all_comb_b2_coef(1), List_all_comb_b2_expo(1)
+  print*, List_all_comb_b3_coef(1), List_all_comb_b3_expo(1)
+
 END_PROVIDER
 
 ! ---
diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
index 1638aa9e..cfdaf95f 100644
--- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -56,79 +56,83 @@ end
 
 !---
 
-subroutine overlap_gauss_r12_v(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,rvec,n_points)
+! TODO apply Gaussian product three times first
+subroutine overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_A, power_B, alpha, beta, rvec, LD_rvec, n_points)
+
   BEGIN_DOC
+  !
   ! Computes the following integral :
   !
-  ! .. math                      ::
-  !
-  !   \int dr exp(-delta (r - D)^2 ) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \int dr exp(-delta (r - D)^2) (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2)
   !   using an array of D_centers
   !
+  ! n_points: nb of integrals
+  !
   END_DOC
 
   implicit none
+
   include 'constants.include.F'
-  integer, intent(in)            :: n_points
-  double precision, intent(in)   :: D_center(n_points,3), delta  ! pure gaussian "D"
-  double precision, intent(in)   :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
-  integer, intent(in)            :: power_A(3),power_B(3)
-  double precision, intent(out)  :: rvec(n_points)
 
-  double precision, allocatable  :: overlap(:)
-  double precision               :: overlap_x, overlap_y, overlap_z
+  integer,          intent(in)  :: LD_D, LD_rvec, n_points
+  integer,          intent(in)  :: power_A(3), power_B(3)
+  double precision, intent(in)  :: D_center(LD_D,3), delta
+  double precision, intent(in)  :: A_center(3), B_center(3), alpha, beta 
+  double precision, intent(out) :: rvec(LD_rvec)
 
-  integer                        :: maxab
-  integer, allocatable           :: iorder_a_new(:)
-  double precision, allocatable  :: A_new(:,:,:), A_center_new(:,:)
-  double precision, allocatable  :: fact_a_new(:)
-  double precision               :: alpha_new
-  double precision               :: accu,thr, coefxy
-  integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1, ipoint
+  integer                       :: maxab
+  integer                       :: d(3), i, lx, ly, lz, iorder_tmp(3), ipoint
+  double precision              :: overlap_x, overlap_y, overlap_z
+  double precision              :: alpha_new
+  double precision              :: accu, thr, coefxy
+  integer,          allocatable :: iorder_a_new(:)
+  double precision, allocatable :: overlap(:)
+  double precision, allocatable :: A_new(:,:,:), A_center_new(:,:)
+  double precision, allocatable :: fact_a_new(:)
 
-  dim1=100
-  thr = 1.d-10
+  thr  = 1.d-10
   d(:) = 0
 
   maxab = maxval(power_A(1:3))
 
-  allocate (A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), &
-            fact_a_new(n_points), iorder_a_new(3), overlap(n_points) )
+  allocate(A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
 
-  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, &
-        alpha_new, fact_a_new, iorder_a_new , delta, alpha, d, power_A, &
-        D_center, A_center, n_points)
+  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new,   &
+        alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, &
+        D_center, LD_D, A_center, n_points)
 
-  do ipoint=1,n_points
-    rvec(ipoint) = 0.d0
-  enddo
+  rvec(:) = 0.d0
 
   do lx = 0, iorder_a_new(1)
     iorder_tmp(1) = lx
+
     do ly = 0, iorder_a_new(2)
       iorder_tmp(2) = ly
+
       do lz = 0, iorder_a_new(3)
         iorder_tmp(3) = lz
-        call overlap_gaussian_xyz_v(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap,dim1,n_points)
-        do ipoint=1,n_points
-          rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * &
-                                        A_new(ipoint,ly,2) * &
-                                        A_new(ipoint,lz,3) * overlap(ipoint)
+
+        call overlap_gaussian_xyz_v(A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B, overlap, n_points)
+
+        do ipoint = 1, n_points
+          rvec(ipoint) = rvec(ipoint) + A_new(ipoint,lx,1) * A_new(ipoint,ly,2) * A_new(ipoint,lz,3) * overlap(ipoint)
         enddo
       enddo
     enddo
   enddo
 
-  do ipoint=1,n_points
+  do ipoint = 1, n_points
     rvec(ipoint) = rvec(ipoint) * fact_a_new(ipoint)
   enddo
+
   deallocate(A_new, A_center_new, fact_a_new, iorder_a_new, overlap)
-end
 
-!---
+end subroutine overlap_gauss_r12_v
+
 !---
 
-subroutine overlap_gauss_xyz_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta,gauss_ints)
+subroutine overlap_gauss_xyz_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta, gauss_ints)
+
   BEGIN_DOC
   ! Computes the following integral :
   !
diff --git a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
index 96625df5..dddf98d4 100644
--- a/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_erf_ints.irp.f
@@ -180,8 +180,7 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B,
   enddo
   ! call give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
   p_new = p_new * p_new
-  call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center &
-                                                , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
+  call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
 
   if(n_pt_out < 0) then
     NAI_pol_mult_erf = 0.d0
@@ -198,7 +197,8 @@ double precision function NAI_pol_mult_erf(A_center, B_center, power_A, power_B,
 end function NAI_pol_mult_erf
 
 ! ---
-subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in, mu_in, res_v, n_points)
+
+subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points)
 
   BEGIN_DOC
   !
@@ -214,74 +214,90 @@ subroutine NAI_pol_mult_erf_v(A_center, B_center, power_A, power_B, alpha, beta,
   include 'utils/constants.include.F'
 
   implicit none
-  integer,          intent(in) :: n_pt_in, n_points
-  integer,          intent(in) :: power_A(3), power_B(3)
-  double precision, intent(in) :: C_center(n_points,3), A_center(3), B_center(3), alpha, beta, mu_in
-  double precision, intent(out) :: res_v(n_points)
 
-  integer                      :: i, n_pt, n_pt_out, ipoint
-  double precision             :: P_center(3)
-  double precision             :: d(0:n_pt_in), coeff, dist, const, factor
-  double precision             :: const_factor, dist_integral
-  double precision             :: accu, p_inv, p, rho, p_inv_2
-  double precision             :: p_new
+  integer,          intent(in)  :: n_pt_in, n_points, LD_C, LD_resv
+  integer,          intent(in)  :: power_A(3), power_B(3)
+  double precision, intent(in)  :: A_center(3), B_center(3), alpha, beta, mu_in
+  double precision, intent(in)  :: C_center(LD_C,3)
+  double precision, intent(out) :: res_v(LD_resv)
 
-  double precision             :: rint
+  integer                       :: i, n_pt, n_pt_out, ipoint
+  double precision              :: P_center(3)
+  double precision              :: d(0:n_pt_in), coeff, dist, const, factor
+  double precision              :: const_factor, dist_integral
+  double precision              :: accu, p_inv, p, rho, p_inv_2
+  double precision              :: p_new, p_new2, coef_tmp
 
-  p       = alpha + beta
-  p_inv   = 1.d0 / p
-  p_inv_2 = 0.5d0 * p_inv
-  rho     = alpha * beta * p_inv
-  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
+  double precision              :: rint
 
-  dist          = 0.d0
+  res_V(1:LD_resv) = 0.d0
+
+  p        = alpha + beta
+  p_inv    = 1.d0 / p
+  p_inv_2  = 0.5d0 * p_inv
+  rho      = alpha * beta * p_inv
+  p_new    = mu_in / dsqrt(p + mu_in * mu_in)
+  p_new2   = p_new * p_new
+  coef_tmp = p * p_new2
+
+  dist = 0.d0
   do i = 1, 3
-    P_center(i)    = (alpha * A_center(i) + beta * B_center(i)) * p_inv
-    dist          += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+    P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist       += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
   enddo
 
-  do ipoint=1,n_points
-    dist_integral = 0.d0
-    do i = 1, 3
-      dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
-    enddo
-    const_factor = dist * rho
-    if(const_factor > 80.d0) then
-      res_V(ipoint) = 0.d0
-      cycle
-    endif
+  const_factor = dist * rho
+  if(const_factor > 80.d0) then
+    return
+  endif
+  factor = dexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv * p_new
 
-    factor = dexp(-const_factor)
-    coeff  = dtwo_pi * factor * p_inv * p_new
+  n_pt =  2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) )
+
+  if(n_pt == 0) then
+
+    do ipoint = 1, n_points
+      dist_integral = 0.d0
+      do i = 1, 3
+        dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+      enddo
+      const = coef_tmp * dist_integral 
 
-    n_pt  =  2 * ( power_A(1) + power_B(1) + power_A(2) + power_B(2) + power_A(3) + power_B(3) )
-    const = p * dist_integral * p_new * p_new
-    if(n_pt == 0) then
       res_v(ipoint) = coeff * rint(0, const)
-      cycle
-    endif
-
-    do i = 0, n_pt_in
-      d(i) = 0.d0
     enddo
-    p_new = p_new * p_new
-    call give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center(ipoint,1:3)&
-        , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
 
-    if(n_pt_out < 0) then
-      res_v(ipoint) = 0.d0
-      cycle
-    endif
+  else
 
-    ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
-    accu = 0.d0
-    do i = 0, n_pt_out, 2
-      accu += d(i) * rint(i/2, const)
+    do ipoint = 1, n_points
+      dist_integral = 0.d0
+      do i = 1, 3
+        dist_integral += (P_center(i) - C_center(ipoint,i)) * (P_center(i) - C_center(ipoint,i))
+      enddo
+      const = coef_tmp * dist_integral 
+
+      do i = 0, n_pt_in
+        d(i) = 0.d0
+      enddo
+      call give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center)
+     
+      if(n_pt_out < 0) then
+        res_v(ipoint) = 0.d0
+        cycle
+      endif
+
+      ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+      accu = 0.d0
+      do i = 0, n_pt_out, 2
+        accu += d(i) * rint(i/2, const)
+      enddo
+     
+      res_v(ipoint) = accu * coeff
     enddo
-    res_v(ipoint) = accu * coeff
-  enddo
 
-end
+  endif
+
+end subroutine NAI_pol_mult_erf_v
 
 ! ---
 
@@ -380,9 +396,7 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A
     d(i) = 0.d0
   enddo
   p_new = p_new * p_new
-
-  call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center &
-                                                , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
+  call give_polynomial_mult_center_one_e_erf_opt( A1_center, A2_center, power_A1, power_A2, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
 
   if(n_pt_out < 0) then
     NAI_pol_mult_erf_with1s = 0.d0
@@ -398,10 +412,9 @@ double precision function NAI_pol_mult_erf_with1s( A1_center, A2_center, power_A
 
 end function NAI_pol_mult_erf_with1s
 
-!--
+! ---
 
-subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2, alpha1, alpha2&
-      , beta, B_center, C_center, n_pt_in, mu_in, res_v, n_points)
+subroutine NAI_pol_mult_erf_with1s_v(A1_center, A2_center, power_A1, power_A2, alpha1, alpha2, beta, B_center, LD_B, C_center, LD_C, n_pt_in, mu_in, res_v, LD_resv, n_points)
 
   BEGIN_DOC
   !
@@ -420,23 +433,26 @@ subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2,
   include 'utils/constants.include.F'
 
   implicit none
-  integer,          intent(in)   :: n_pt_in, n_points
-  integer,          intent(in)   :: power_A1(3), power_A2(3)
-  double precision, intent(in)   :: C_center(n_points,3), A1_center(3), A2_center(3), B_center(n_points,3)
-  double precision, intent(in)   :: alpha1, alpha2, beta, mu_in
-  double precision, intent(out)  :: res_v(n_points)
+  integer,          intent(in)  :: n_pt_in, LD_B, LD_C, LD_resv, n_points
+  integer,          intent(in)  :: power_A1(3), power_A2(3)
+  double precision, intent(in)  :: A1_center(3), A2_center(3)
+  double precision, intent(in)  :: C_center(LD_C,3), B_center(LD_B,3)
+  double precision, intent(in)  :: alpha1, alpha2, beta, mu_in
+  double precision, intent(out) :: res_v(LD_resv)
 
-  integer                        :: i, n_pt, n_pt_out, ipoint
-  double precision               :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
-  double precision               :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
-  double precision               :: dist_integral
-  double precision               :: d(0:n_pt_in), coeff, const, factor
-  double precision               :: accu
-  double precision               :: p_new, p_new2
+  integer                       :: i, n_pt, n_pt_out, ipoint
+  double precision              :: alpha12, alpha12_inv, alpha12_inv_2, rho12, A12_center(3), dist12, const_factor12
+  double precision              :: p, p_inv, p_inv_2, rho, P_center(3), dist, const_factor
+  double precision              :: dist_integral
+  double precision              :: d(0:n_pt_in), coeff, const, factor
+  double precision              :: accu
+  double precision              :: p_new, p_new2, coef_tmp, cons_tmp
 
-  double precision               :: rint
+  double precision              :: rint
 
 
+  res_V(1:LD_resv) = 0.d0
+
   ! e^{-alpha1 (r - A1)^2} e^{-alpha2 (r - A2)^2} = e^{-K12} e^{-alpha12 (r - A12)^2}
   alpha12       = alpha1 + alpha2
   alpha12_inv   = 1.d0 / alpha12
@@ -446,87 +462,92 @@ subroutine NAI_pol_mult_erf_with1s_v( A1_center, A2_center, power_A1, power_A2,
   A12_center(2) = (alpha1 * A1_center(2) + alpha2 * A2_center(2)) * alpha12_inv
   A12_center(3) = (alpha1 * A1_center(3) + alpha2 * A2_center(3)) * alpha12_inv
   dist12        = (A1_center(1) - A2_center(1)) * (A1_center(1) - A2_center(1))&
-      + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))&
-      + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
+                + (A1_center(2) - A2_center(2)) * (A1_center(2) - A2_center(2))&
+                + (A1_center(3) - A2_center(3)) * (A1_center(3) - A2_center(3))
 
   const_factor12 = dist12 * rho12
-
   if(const_factor12 > 80.d0) then
-    res_v(:) = 0.d0
     return
   endif
 
-    ! ---
-
   ! e^{-K12} e^{-alpha12 (r - A12)^2} e^{-beta (r - B)^2} = e^{-K} e^{-p (r - P)^2}
-  p           = alpha12 + beta
-  p_inv       = 1.d0 / p
-  p_inv_2     = 0.5d0 * p_inv
-  rho         = alpha12 * beta * p_inv
-  p_new  = mu_in / dsqrt(p + mu_in * mu_in)
-  p_new2 = p_new * p_new
-  n_pt  =  2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) &
-              + power_A1(3) + power_A2(3) )
+  p        = alpha12 + beta
+  p_inv    = 1.d0 / p
+  p_inv_2  = 0.5d0 * p_inv
+  rho      = alpha12 * beta * p_inv
+  p_new    = mu_in / dsqrt(p + mu_in * mu_in)
+  p_new2   = p_new * p_new
+  coef_tmp = dtwo_pi * p_inv * p_new
+  cons_tmp = p * p_new2
+  n_pt     =  2 * (power_A1(1) + power_A2(1) + power_A1(2) + power_A2(2) + power_A1(3) + power_A2(3) )
 
-  do ipoint=1,n_points
+  if(n_pt == 0) then
 
-    P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
-    P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
-    P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
-    dist   = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
-        + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
-        + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3))
+    do ipoint = 1, n_points
 
-    const_factor = const_factor12 + dist * rho
-    if(const_factor > 80.d0) then
-      res_v(ipoint) = 0.d0
-      cycle
-    endif
+      dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
+           + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
+           + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3))
+      const_factor = const_factor12 + dist * rho
+      if(const_factor > 80.d0) cycle
+      coeff = coef_tmp * dexp(-const_factor)
 
-    dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
-        + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
-        + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
+      P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
+      P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
+      P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
+      dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
+                    + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
+                    + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
+      const = cons_tmp * dist_integral
 
-    ! ---
-
-    factor = dexp(-const_factor)
-    coeff  = dtwo_pi * factor * p_inv * p_new
-
-    const = p * dist_integral * p_new2
-    if(n_pt == 0) then
       res_v(ipoint) = coeff * rint(0, const)
-      cycle
-    endif
-
-    do i = 0, n_pt_in
-      d(i) = 0.d0
     enddo
 
-    !TODO: VECTORIZE HERE
-    call give_polynomial_mult_center_one_e_erf_opt(                  &
-        A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3)&
-        , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center,1)
+  else
 
-    if(n_pt_out < 0) then
-      res_v(ipoint) = 0.d0
-      cycle
-    endif
-
-    ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
-    accu = 0.d0
-    do i = 0, n_pt_out, 2
-      accu += d(i) * rint(i/2, const)
+    do ipoint = 1, n_points
+  
+      dist = (A12_center(1) - B_center(ipoint,1)) * (A12_center(1) - B_center(ipoint,1))&
+           + (A12_center(2) - B_center(ipoint,2)) * (A12_center(2) - B_center(ipoint,2))&
+           + (A12_center(3) - B_center(ipoint,3)) * (A12_center(3) - B_center(ipoint,3)) 
+      const_factor = const_factor12 + dist * rho
+      if(const_factor > 80.d0) cycle
+      coeff = coef_tmp * dexp(-const_factor)
+  
+      P_center(1) = (alpha12 * A12_center(1) + beta * B_center(ipoint,1)) * p_inv
+      P_center(2) = (alpha12 * A12_center(2) + beta * B_center(ipoint,2)) * p_inv
+      P_center(3) = (alpha12 * A12_center(3) + beta * B_center(ipoint,3)) * p_inv
+      dist_integral = (P_center(1) - C_center(ipoint,1)) * (P_center(1) - C_center(ipoint,1))&
+                    + (P_center(2) - C_center(ipoint,2)) * (P_center(2) - C_center(ipoint,2))&
+                    + (P_center(3) - C_center(ipoint,3)) * (P_center(3) - C_center(ipoint,3))
+      const = cons_tmp * dist_integral
+  
+      do i = 0, n_pt_in
+        d(i) = 0.d0
+      enddo
+      !TODO: VECTORIZE HERE
+      call give_polynomial_mult_center_one_e_erf_opt(A1_center, A2_center, power_A1, power_A2, C_center(ipoint,1:3), n_pt_in, d, n_pt_out, p_inv_2, p_new2, P_center)
+  
+      if(n_pt_out < 0) then
+        cycle
+      endif
+  
+      ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
+      accu = 0.d0
+      do i = 0, n_pt_out, 2
+        accu += d(i) * rint(i/2, const)
+      enddo
+  
+      res_v(ipoint) = accu * coeff
     enddo
-    res_v(ipoint) = accu * coeff
-  end do
 
-end
+  endif
+
+end subroutine NAI_pol_mult_erf_with1s_v
 
-! ---
 ! ---
 
-subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_A, power_B, C_center &
-                                                    , n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
+subroutine give_polynomial_mult_center_one_e_erf_opt(A_center, B_center, power_A, power_B, C_center, n_pt_in, d, n_pt_out, p_inv_2, p_new, P_center)
 
   BEGIN_DOC
   ! Returns the explicit polynomial in terms of the $t$ variable of the
@@ -554,17 +575,17 @@ subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_
   accu = 0.d0
   ASSERT (n_pt_in > 1)
 
-  R1x(0)  = (P_center(1) - A_center(1))
-  R1x(1)  = 0.d0
-  R1x(2)  = -(P_center(1) - C_center(1))* p_new
+  R1x(0) = (P_center(1) - A_center(1))
+  R1x(1) = 0.d0
+  R1x(2) = -(P_center(1) - C_center(1))* p_new
   ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R1xp(0)  = (P_center(1) - B_center(1))
-  R1xp(1)  = 0.d0
-  R1xp(2)  =-(P_center(1) - C_center(1))* p_new
+  R1xp(0) = (P_center(1) - B_center(1))
+  R1xp(1) = 0.d0
+  R1xp(2) =-(P_center(1) - C_center(1))* p_new
   !R1xp = (P_x - B_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R2x(0)  =  p_inv_2
-  R2x(1)  = 0.d0
-  R2x(2)  = -p_inv_2 * p_new
+  R2x(0) =  p_inv_2
+  R2x(1) = 0.d0
+  R2x(2) = -p_inv_2 * p_new
   !R2x  = 0.5 / p - 0.5/p ( t * mu/sqrt(p+mu^2) )^2
 
   do i = 0, n_pt_in
@@ -588,13 +609,13 @@ subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_
     return
   endif
 
-  R1x(0)  = (P_center(2) - A_center(2))
-  R1x(1)  = 0.d0
-  R1x(2)  = -(P_center(2) - C_center(2))* p_new
+  R1x(0) = (P_center(2) - A_center(2))
+  R1x(1) = 0.d0
+  R1x(2) = -(P_center(2) - C_center(2))* p_new
   ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R1xp(0)  = (P_center(2) - B_center(2))
-  R1xp(1)  = 0.d0
-  R1xp(2)  =-(P_center(2) - C_center(2))* p_new
+  R1xp(0) = (P_center(2) - B_center(2))
+  R1xp(1) = 0.d0
+  R1xp(2) =-(P_center(2) - C_center(2))* p_new
   !R1xp = (P_x - B_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
   a_y = power_A(2)
   b_y = power_B(2)
@@ -607,13 +628,13 @@ subroutine give_polynomial_mult_center_one_e_erf_opt( A_center, B_center, power_
     return
   endif
 
-  R1x(0)  = (P_center(3) - A_center(3))
-  R1x(1)  = 0.d0
-  R1x(2)  = -(P_center(3) - C_center(3)) * p_new
+  R1x(0) = (P_center(3) - A_center(3))
+  R1x(1) = 0.d0
+  R1x(2) = -(P_center(3) - C_center(3)) * p_new
   ! R1x = (P_x - A_x) - (P_x - C_x) ( t * mu/sqrt(p+mu^2) )^2
-  R1xp(0)  = (P_center(3) - B_center(3))
-  R1xp(1)  = 0.d0
-  R1xp(2)  =-(P_center(3) - C_center(3)) * p_new
+  R1xp(0) = (P_center(3) - B_center(3))
+  R1xp(1) = 0.d0
+  R1xp(2) =-(P_center(3) - C_center(3)) * p_new
   !R2x  = 0.5 / p - 0.5/p ( t * mu/sqrt(p+mu^2) )^2
   a_z = power_A(3)
   b_z = power_B(3)
@@ -642,16 +663,15 @@ end subroutine give_polynomial_mult_center_one_e_erf_opt
 
 ! ---
 
+subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
 
-
-subroutine give_polynomial_mult_center_one_e_erf(A_center,B_center,alpha,beta,&
-      power_A,power_B,C_center,n_pt_in,d,n_pt_out,mu_in)
   BEGIN_DOC
   ! Returns the explicit polynomial in terms of the $t$ variable of the
   ! following polynomial:
   !
   ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.
   END_DOC
+
   implicit none
   integer, intent(in)            :: n_pt_in
   integer,intent(out)            :: n_pt_out
diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f
index d53209d0..8fad9079 100644
--- a/src/ao_tc_eff_map/fit_j.irp.f
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@@ -1,3 +1,5 @@
+! ---
+
 BEGIN_PROVIDER [ double precision, expo_j_xmu, (n_fit_1_erf_x) ]
  implicit none
  BEGIN_DOC
@@ -14,8 +16,8 @@ END_PROVIDER
 
 ! ---
 
- BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x, (n_max_fit_slat)]
-&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x, (n_max_fit_slat)]
+ BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x, (ng_fit_jast)]
 
   BEGIN_DOC
   !
@@ -34,31 +36,96 @@ END_PROVIDER
   implicit none
   integer          :: i
   double precision :: tmp
-  double precision :: expos(n_max_fit_slat), alpha, beta
+  double precision :: expos(ng_fit_jast), alpha, beta
 
-  tmp = -0.5d0 / (mu_erf * sqrt(dacos(-1.d0)))
+  if(ng_fit_jast .eq. 1) then
 
-  alpha = expo_j_xmu(1) * mu_erf
-  call expo_fit_slater_gam(alpha, expos)
-  beta = expo_j_xmu(2) * mu_erf * mu_erf
+    coef_gauss_j_mu_x = (/ -0.47947881d0 /)
+    expo_gauss_j_mu_x = (/ 3.4987848d0   /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_j_mu_x = (/ -0.18390742d0, -0.35512656d0 /)
+    expo_gauss_j_mu_x = (/ 31.9279947d0 ,  2.11428789d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_j_mu_x = (/ -0.07501725d0, -0.28499012d0, -0.1953932d0  /)
+    expo_gauss_j_mu_x = (/ 206.74058566d0, 1.72974157d0, 11.18735164d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_j_mu_x = (/ -0.01832955d0 , -0.10188952d0 , -0.20710858d0 , -0.18975032d0 , -0.04641657d0  /)
+    expo_gauss_j_mu_x = (/ 4.33116687d+03, 2.61292842d+01, 1.43447161d+00, 4.92767426d+00, 2.10654699d+02 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_j_mu_x = (/ -0.08783664d0 , -0.16088711d0 , -0.18464486d0 , -0.0368509d0  , -0.08130028d0 , -0.0126972d0   /)
+    expo_gauss_j_mu_x = (/ 4.09729729d+01, 7.11620618d+00, 2.03692338d+00, 4.10831731d+02, 1.12480198d+00, 1.00000000d+04 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    alpha = expo_j_xmu(1) * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = expo_j_xmu(2) * mu_erf * mu_erf
+
+    tmp = -1.0d0 / sqrt(dacos(-1.d0))
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = expos(i) + beta
+      coef_gauss_j_mu_x(i) = tmp * coef_fit_slat_gauss(i) 
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
   
-  do i = 1, n_max_fit_slat
-    expo_gauss_j_mu_x(i) = expos(i) + beta
-    coef_gauss_j_mu_x(i) = tmp * coef_fit_slat_gauss(i) 
+  endif
+
+  tmp = 0.5d0 / mu_erf
+  do i = 1, ng_fit_jast
+    coef_gauss_j_mu_x(i) = tmp * coef_gauss_j_mu_x(i) 
   enddo
 
 END_PROVIDER 
 
 ! ---
 
- BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x_2, (n_max_fit_slat)]
-&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x_2, (n_max_fit_slat)]
+ BEGIN_PROVIDER [double precision, expo_gauss_j_mu_x_2, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_x_2, (ng_fit_jast)]
 
   BEGIN_DOC
   !
   ! J(mu,r12)^2 = 0.25/mu^2 F(r12*mu)^2
   !
-  ! F(x)^2 = 1 /pi * exp(-2 * alpha * x) exp(-2 * beta * x^2) 
+  ! F(x)^2 = 1/pi * exp(-2 * alpha * x) exp(-2 * beta * x^2) 
   ! 
   ! The slater function exp(-2 * alpha * x) is fitted with n_max_fit_slat gaussians 
   !
@@ -69,33 +136,98 @@ END_PROVIDER
   implicit none
   integer          :: i
   double precision :: tmp
-  double precision :: expos(n_max_fit_slat), alpha, beta
+  double precision :: expos(ng_fit_jast), alpha, beta
   double precision :: alpha_opt, beta_opt
 
-  !alpha_opt = 2.d0 * expo_j_xmu(1)
-  !beta_opt  = 2.d0 * expo_j_xmu(2)
- 
-  ! direct opt
-  alpha_opt = 3.52751759d0
-  beta_opt  = 1.26214809d0
+  if(ng_fit_jast .eq. 1) then
 
-  tmp = 0.25d0 / (mu_erf * mu_erf * dacos(-1.d0))
+    coef_gauss_j_mu_x_2 = (/ 0.26699573d0  /)
+    expo_gauss_j_mu_x_2 = (/ 11.71029824d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
 
-  alpha = alpha_opt * mu_erf
-  call expo_fit_slater_gam(alpha, expos)
-  beta = beta_opt * mu_erf * mu_erf
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.11627934d0  , 0.18708824d0 /)
+    expo_gauss_j_mu_x_2 = (/ 102.41386863d0, 6.36239771d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.04947216d0  , 0.14116238d0, 0.12276501d0  /)
+    expo_gauss_j_mu_x_2 = (/ 635.29701766d0, 4.87696954d0, 33.36745891d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.01461527d0  , 0.03257147d0  , 0.08831354d0  , 0.11411794d0  , 0.06858783d0   /)
+    expo_gauss_j_mu_x_2 = (/ 8.76554470d+03, 4.90224577d+02, 3.68267125d+00, 1.29663940d+01, 6.58240931d+01 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.01347632d0  , 0.03929124d0  , 0.06289468d0  , 0.10702493d0  , 0.06999865d0  , 0.02558191d0   /)
+    expo_gauss_j_mu_x_2 = (/ 1.00000000d+04, 1.20900717d+02, 3.20346191d+00, 8.92157196d+00, 3.28119120d+01, 6.49045808d+02 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    !alpha_opt = 2.d0 * expo_j_xmu(1)
+    !beta_opt  = 2.d0 * expo_j_xmu(2)
+   
+    ! direct opt
+    alpha_opt = 3.52751759d0
+    beta_opt  = 1.26214809d0
   
-  do i = 1, n_max_fit_slat
-    expo_gauss_j_mu_x_2(i) = expos(i) + beta
-    coef_gauss_j_mu_x_2(i) = tmp * coef_fit_slat_gauss(i) 
+    alpha = alpha_opt * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = beta_opt * mu_erf * mu_erf
+    
+    tmp = 1.d0 / dacos(-1.d0)
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = expos(i) + beta
+      coef_gauss_j_mu_x_2(i) = tmp * coef_fit_slat_gauss(i) 
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
+  
+  endif
+
+  tmp = 0.25d0 / (mu_erf * mu_erf)
+  do i = 1, ng_fit_jast
+    coef_gauss_j_mu_x_2(i) = tmp * coef_gauss_j_mu_x_2(i)
   enddo
 
 END_PROVIDER 
 
 ! ---
 
- BEGIN_PROVIDER [double precision, expo_gauss_j_mu_1_erf, (n_max_fit_slat)]
-&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_1_erf, (n_max_fit_slat)]
+ BEGIN_PROVIDER [double precision, expo_gauss_j_mu_1_erf, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_j_mu_1_erf, (ng_fit_jast)]
 
   BEGIN_DOC
   !
@@ -108,25 +240,90 @@ END_PROVIDER
   implicit none
   integer          :: i
   double precision :: tmp
-  double precision :: expos(n_max_fit_slat), alpha, beta
+  double precision :: expos(ng_fit_jast), alpha, beta
   double precision :: alpha_opt, beta_opt
 
-  !alpha_opt = expo_j_xmu(1) + expo_gauss_1_erf_x(1)
-  !beta_opt  = expo_j_xmu(2) + expo_gauss_1_erf_x(2)
- 
-  ! direct opt
-  alpha_opt = 2.87875632d0
-  beta_opt  = 1.34801003d0
+  if(ng_fit_jast .eq. 1) then
 
-  tmp = -0.25d0 / (mu_erf * dsqrt(dacos(-1.d0)))
+    coef_gauss_j_mu_1_erf = (/ -0.47742461d0 /)
+    expo_gauss_j_mu_1_erf = (/ 8.72255696d0  /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
 
-  alpha = alpha_opt * mu_erf
-  call expo_fit_slater_gam(alpha, expos)
-  beta = beta_opt * mu_erf * mu_erf
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.19342649d0, -0.34563835d0 /)
+    expo_gauss_j_mu_1_erf = (/ 78.66099999d0,  5.04324363d0 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.0802541d0  , -0.27019258d0, -0.20546681d0 /)
+    expo_gauss_j_mu_1_erf = (/ 504.53350764d0,  4.01408169d0, 26.5758329d0  /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.02330531d0 , -0.11888176d0 , -0.16476192d0 , -0.19874713d0 , -0.05889174d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.00000000d+04, 4.66067922d+01, 3.04359857d+00, 9.54726649d+00, 3.59796835d+02 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.01865654d0 , -0.18319251d0 , -0.06543196d0 , -0.11522778d0 , -0.14825793d0 , -0.03327101d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.00000000d+04, 8.05593848d+00, 1.27986190d+02, 2.92674319d+01, 2.93583623d+00, 7.65609148d+02 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    !alpha_opt = expo_j_xmu(1) + expo_gauss_1_erf_x(1)
+    !beta_opt  = expo_j_xmu(2) + expo_gauss_1_erf_x(2)
+   
+    ! direct opt
+    alpha_opt = 2.87875632d0
+    beta_opt  = 1.34801003d0
+    
+    alpha = alpha_opt * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = beta_opt * mu_erf * mu_erf
+    
+    tmp = -1.d0 / dsqrt(dacos(-1.d0))
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = expos(i) + beta
+      coef_gauss_j_mu_1_erf(i) = tmp * coef_fit_slat_gauss(i) 
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
   
-  do i = 1, n_max_fit_slat
-    expo_gauss_j_mu_1_erf(i) = expos(i) + beta
-    coef_gauss_j_mu_1_erf(i) = tmp * coef_fit_slat_gauss(i) 
+  endif
+
+  tmp = 0.25d0 / mu_erf
+  do i = 1, ng_fit_jast
+    coef_gauss_j_mu_1_erf(i) = tmp * coef_gauss_j_mu_1_erf(i)
   enddo
 
 END_PROVIDER 
diff --git a/src/ao_tc_eff_map/potential.irp.f b/src/ao_tc_eff_map/potential.irp.f
index d8b3c442..67d572e5 100644
--- a/src/ao_tc_eff_map/potential.irp.f
+++ b/src/ao_tc_eff_map/potential.irp.f
@@ -142,27 +142,96 @@ double precision function fit_1_erf_x(x)
 
 end
 
- BEGIN_PROVIDER [double precision, expo_gauss_1_erf_x_2, (n_max_fit_slat)]
-&BEGIN_PROVIDER [double precision, coef_gauss_1_erf_x_2, (n_max_fit_slat)]
- implicit none
- BEGIN_DOC
-! (1 - erf(mu*x))^2 = \sum_i coef_gauss_1_erf_x_2(i) * exp(-expo_gauss_1_erf_x_2(i) * x^2)
-!
-! This is based on a fit of (1 - erf(mu*x)) by exp(-alpha * x) exp(-beta*mu^2x^2)
-!
-! and the slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians 
- END_DOC
- integer :: i
- double precision :: expos(n_max_fit_slat),alpha,beta
- alpha = 2.d0 * expos_slat_gauss_1_erf_x(1) * mu_erf
- call expo_fit_slater_gam(alpha,expos)
- beta = 2.d0 * expos_slat_gauss_1_erf_x(2) * mu_erf**2.d0
- do i = 1, n_max_fit_slat
-  expo_gauss_1_erf_x_2(i) = expos(i) + beta
-  coef_gauss_1_erf_x_2(i) = coef_fit_slat_gauss(i)
- enddo
+! ---
+
+ BEGIN_PROVIDER [double precision, expo_gauss_1_erf_x_2, (ng_fit_jast)]
+&BEGIN_PROVIDER [double precision, coef_gauss_1_erf_x_2, (ng_fit_jast)]
+
+  BEGIN_DOC
+  ! (1 - erf(mu*x))^2 = \sum_i coef_gauss_1_erf_x_2(i) * exp(-expo_gauss_1_erf_x_2(i) * x^2)
+  !
+  ! This is based on a fit of (1 - erf(mu*x)) by exp(-alpha * x) exp(-beta*mu^2x^2)
+  !
+  ! and the slater function exp(-alpha * x) is fitted with n_max_fit_slat gaussians 
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: expos(ng_fit_jast), alpha, beta, tmp
+
+  if(ng_fit_jast .eq. 1) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.85345277d0 /)
+    expo_gauss_1_erf_x_2 = (/ 6.23519457d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, n_max_fit_slat
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 2) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.31030624d0 , 0.64364964d0 /)
+    expo_gauss_1_erf_x_2 = (/ 55.39184787d0, 3.92151407d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, n_max_fit_slat
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 3) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.33206082d0 , 0.52347449d0, 0.12605012d0   /)
+    expo_gauss_1_erf_x_2 = (/ 19.90272209d0, 3.2671671d0 , 336.47320445d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, n_max_fit_slat
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 5) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.02956716d0, 0.17025555d0, 0.32774114d0, 0.39034764d0, 0.07822781d0 /)
+    expo_gauss_1_erf_x_2 = (/ 6467.28126d0, 46.9071990d0, 9.09617721d0, 2.76883328d0, 360.367093d0 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, n_max_fit_slat
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 6) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.18331042d0  , 0.10971118d0  , 0.29949169d0  , 0.34853132d0  , 0.0394275d0   , 0.01874444d0   /)
+    expo_gauss_1_erf_x_2 = (/ 2.54293498d+01, 1.40317872d+02, 7.14630801d+00, 2.65517675d+00, 1.45142619d+03, 1.00000000d+04 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, n_max_fit_slat
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 20) then
+
+    ASSERT(n_max_fit_slat == 20)
+
+    alpha = 2.d0 * expos_slat_gauss_1_erf_x(1) * mu_erf
+    call expo_fit_slater_gam(alpha, expos)
+    beta = 2.d0 * expos_slat_gauss_1_erf_x(2) * mu_erf * mu_erf
+    do i = 1, n_max_fit_slat
+      expo_gauss_1_erf_x_2(i) = expos(i) + beta
+      coef_gauss_1_erf_x_2(i) = coef_fit_slat_gauss(i)
+    enddo
+
+  else
+
+    print *, ' not implemented yet'
+    stop
+
+  endif
+
 END_PROVIDER 
 
+! ---
+
 double precision function fit_1_erf_x_2(x)
  implicit none
  double precision, intent(in) :: x
diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f
index 89f46a05..31cf0624 100644
--- a/src/bi_ort_ints/total_twoe_pot.irp.f
+++ b/src/bi_ort_ints/total_twoe_pot.irp.f
@@ -45,6 +45,9 @@ BEGIN_PROVIDER [double precision, ao_two_e_tc_tot, (ao_num, ao_num, ao_num, ao_n
             ! ao_non_hermit_term_chemist(k,i,l,j) = < k l | [erf( mu r12) - 1] d/d_r12 | i j > on the AO basis
             integral_nsym = ao_non_hermit_term_chemist(k,i,l,j)
 
+            !print *, ' sym     integ = ', integral_sym
+            !print *, ' non-sym integ = ', integral_nsym
+
             ao_two_e_tc_tot(k,i,l,j) = integral_sym + integral_nsym 
             !write(111,*) ao_two_e_tc_tot(k,i,l,j) 
           enddo
diff --git a/src/bi_ortho_mos/overlap.irp.f b/src/bi_ortho_mos/overlap.irp.f
index 09cec15f..d7f45c94 100644
--- a/src/bi_ortho_mos/overlap.irp.f
+++ b/src/bi_ortho_mos/overlap.irp.f
@@ -58,7 +58,7 @@
   accu_nd = accu_nd/dble(mo_num**2-mo_num)
   if(dabs(accu_d-1.d0).gt.1.d-10.or.dabs(accu_nd).gt.1.d-10)then
     print*,'Warning !!!'
-    print*,'Average trace of overlap_bi_ortho is different from 1 by ', accu_d
+    print*,'Average trace of overlap_bi_ortho is different from 1 by ', dabs(accu_d-1.d0)
     print*,'And bi orthogonality is off by an average of ',accu_nd
     print*,'****************'
     print*,'Overlap matrix betwee mo_l_coef and mo_r_coef  '
@@ -76,67 +76,85 @@ END_PROVIDER
 
 ! ---
 
-
  BEGIN_PROVIDER [ double precision, overlap_mo_r, (mo_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, overlap_mo_l, (mo_num, mo_num)]
- implicit none
- BEGIN_DOC
-! overlap_mo_r_mo(j,i) = <MO_i|MO_R_j>
- END_DOC
- integer :: i,j,p,q
- overlap_mo_r= 0.d0
- overlap_mo_l= 0.d0
- do i = 1, mo_num
-   do j = 1, mo_num
-    do p = 1, ao_num
-     do q = 1, ao_num
-      overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) 
-      overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p)
-     enddo
+
+  BEGIN_DOC
+  ! overlap_mo_r_mo(j,i) = <MO_i|MO_R_j>
+  END_DOC
+
+  implicit none
+  integer :: i, j, p, q
+
+  overlap_mo_r = 0.d0
+  overlap_mo_l = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do p = 1, ao_num
+        do q = 1, ao_num
+          overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) 
+          overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p)
+        enddo
+      enddo
     enddo
-   enddo
- enddo
+  enddo
+
 END_PROVIDER 
 
+! ---
+
  BEGIN_PROVIDER [ double precision, overlap_mo_r_mo, (mo_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, overlap_mo_l_mo, (mo_num, mo_num)]
- implicit none
- BEGIN_DOC
-! overlap_mo_r_mo(j,i) = <MO_j|MO_R_i>
- END_DOC
- integer :: i,j,p,q
- overlap_mo_r_mo = 0.d0
- overlap_mo_l_mo = 0.d0
- do i = 1, mo_num
-   do j = 1, mo_num
-    do p = 1, ao_num
-     do q = 1, ao_num
-      overlap_mo_r_mo(j,i) += mo_coef(p,j) * mo_r_coef(q,i) * ao_overlap(q,p)
-      overlap_mo_l_mo(j,i) += mo_coef(p,j) * mo_l_coef(q,i) * ao_overlap(q,p)
-     enddo
+
+  BEGIN_DOC
+  ! overlap_mo_r_mo(j,i) = <MO_j|MO_R_i>
+  END_DOC
+
+  implicit none
+  integer :: i, j, p, q
+
+  overlap_mo_r_mo = 0.d0
+  overlap_mo_l_mo = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do p = 1, ao_num
+        do q = 1, ao_num
+          overlap_mo_r_mo(j,i) += mo_coef(p,j) * mo_r_coef(q,i) * ao_overlap(q,p)
+          overlap_mo_l_mo(j,i) += mo_coef(p,j) * mo_l_coef(q,i) * ao_overlap(q,p)
+        enddo
+      enddo
     enddo
-   enddo
- enddo
+  enddo
+
 END_PROVIDER 
 
+! ---
+
  BEGIN_PROVIDER [ double precision, angle_left_right, (mo_num)]
 &BEGIN_PROVIDER [ double precision, max_angle_left_right]
- implicit none
- BEGIN_DOC
+
+  BEGIN_DOC
   ! angle_left_right(i) = angle between the left-eigenvector chi_i and the right-eigenvector phi_i
- END_DOC
- integer :: i,j
- double precision :: left,right,arg
- do i = 1, mo_num
-  left = overlap_mo_l(i,i)
-  right = overlap_mo_r(i,i)
-  arg = min(overlap_bi_ortho(i,i)/(left*right),1.d0)
-  arg = max(arg,-1.d0)
-  angle_left_right(i) = dacos(arg) * 180.d0/dacos(-1.d0)
- enddo
- double precision :: angle(mo_num)
- angle(1:mo_num) = dabs(angle_left_right(1:mo_num))
- max_angle_left_right = maxval(angle)
+  END_DOC
+
+  implicit none
+  integer          :: i, j
+  double precision :: left, right, arg
+  double precision :: angle(mo_num)
+
+  do i = 1, mo_num
+    left  = overlap_mo_l(i,i)
+    right = overlap_mo_r(i,i)
+    arg = min(overlap_bi_ortho(i,i)/(left*right),1.d0)
+    arg = max(arg, -1.d0)
+    angle_left_right(i) = dacos(arg) * 180.d0/dacos(-1.d0)
+  enddo
+
+  angle(1:mo_num) = dabs(angle_left_right(1:mo_num))
+  max_angle_left_right = maxval(angle)
+
 END_PROVIDER 
 
+! ---
+
 
diff --git a/src/cipsi/selection.irp.f b/src/cipsi/selection.irp.f
index 4a9ed5f3..62d7c52c 100644
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@@ -750,7 +750,10 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
         if (delta_E < 0.d0) then
             tmp = -tmp
         endif
+
+        !e_pert(istate) = alpha_h_psi * alpha_h_psi / (E0(istate) - Hii)
         e_pert(istate) = 0.5d0 * (tmp - delta_E)
+
         if (dabs(alpha_h_psi) > 1.d-4) then
           coef(istate) = e_pert(istate) / alpha_h_psi
         else
diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f
index 36d0120d..df1eb71d 100644
--- a/src/non_hermit_dav/biorthog.irp.f
+++ b/src/non_hermit_dav/biorthog.irp.f
@@ -252,7 +252,7 @@ end subroutine non_hrmt_real_diag_new
 
 ! ---
 
-subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval)
+subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, eigval)
 
   BEGIN_DOC
   ! 
@@ -266,13 +266,14 @@ subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval)
   implicit none
   integer,          intent(in)  :: n
   double precision, intent(in)  :: A(n,n)
+  double precision, intent(in)  :: thr_d, thr_nd
   integer,          intent(out) :: n_real_eigv
   double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
 
   integer                       :: i, j
   integer                       :: n_good
   double precision              :: thr, thr_cut, thr_diag, thr_norm
-  double precision              :: accu_d, accu_nd, thr_d, thr_nd
+  double precision              :: accu_d, accu_nd
 
   integer,          allocatable :: list_good(:), iorder(:)
   double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
@@ -327,9 +328,9 @@ subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval)
   !                  track & sort the real eigenvalues 
 
   n_good = 0
-  thr    = 1.d-5
+  thr    = 1.d-3
   do i = 1, n
-      print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    print*, 'Re(i) + Im(i)', WR(i), WI(i)
     if(dabs(WI(i)) .lt. thr) then
       n_good += 1
     else
@@ -395,22 +396,22 @@ subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval)
   ! -------------------------------------------------------------------------------------
   !                               check bi-orthogonality
 
-  thr_d  = 1d-10 ! -7
-  thr_nd = 1d-10 ! -7
+  thr_diag = 10.d0
+  thr_norm = 1d+10
 
   allocate( S(n_real_eigv,n_real_eigv) )
-  call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .false.)
 
-  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .lt. thr_d) ) then
+  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .lt. thr_d) ) then
 
     print *, ' lapack vectors are normalized and bi-orthogonalized'
     deallocate(S)
     return
 
-  elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .gt. thr_d) ) then
+  elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
 
     print *, ' lapack vectors are not normalized but bi-orthogonalized'
-    call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, .true.)
+    call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
 
     call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
 
@@ -429,17 +430,17 @@ subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval)
     call impose_biorthog_degen_eigvec(n, eigval, leigvec, reigvec)
 
 
-    !call impose_orthog_biorthog_degen_eigvec(n, eigval, leigvec, reigvec)
+    !call impose_orthog_biorthog_degen_eigvec(n, thr_d, thr_nd, eigval, leigvec, reigvec)
 
     !call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, ao_overlap, leigvec, reigvec)
 
     ! ---
 
-    call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+    call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .false.)
     if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .gt. thr_d) ) then
-      call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, .true.)
+      call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
     endif
-    call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .true.)
+    call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
 
     !call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec)
     !call impose_biorthog_lu(n, n_real_eigv, leigvec, reigvec)
diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
index d79e25ba..53c62ce8 100644
--- a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
+++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
@@ -356,6 +356,7 @@ subroutine non_hrmt_real_diag(n, A, leigvec, reigvec, n_real_eigv, eigval)
   ! Eigvalue(n) = WR(n) + i * WI(n)
   allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
   Aw = A
+  !print *, ' matrix to diagonalize', Aw
   call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR)
 
   ! ---
@@ -573,21 +574,22 @@ end subroutine non_hrmt_general_real_diag
 
 ! ---
 
-subroutine impose_biorthog_qr(m, n, Vl, Vr)
+subroutine impose_biorthog_qr(m, n, thr_d, thr_nd, Vl, Vr)
 
   implicit none 
-  integer, intent(in)             :: m, n
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: thr_d, thr_nd
   double precision, intent(inout) :: Vl(m,n), Vr(m,n)
 
   integer                         :: i, j
   integer                         :: LWORK, INFO
-  double precision                :: accu_nd, accu_d, thr_nd, thr_d
+  double precision                :: accu_nd, accu_d
   double precision, allocatable   :: TAU(:), WORK(:)
   double precision, allocatable   :: S(:,:), R(:,:), tmp(:,:)
 
   ! ---
 
-  call check_biorthog_binormalize(m, n, Vl, Vr, .false.)
+  call check_biorthog_binormalize(m, n, Vl, Vr, thr_d, thr_nd, .false.)
   
   ! ---
   
@@ -609,9 +611,7 @@ subroutine impose_biorthog_qr(m, n, Vl, Vr)
   enddo
   accu_nd = dsqrt(accu_nd)
 
-  thr_d  = 1d-10
-  thr_nd = 1d-08
-  if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n)) .lt. thr_d)) then
+  if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n))/dble(n) .lt. thr_d)) then
     print *, ' bi-orthogonal vectors without QR !'
     deallocate(S)
     return
@@ -1011,7 +1011,7 @@ subroutine check_degen(n, m, eigval, leigvec, reigvec)
   double precision                :: ei, ej, de, de_thr, accu_nd
   double precision, allocatable   :: S(:,:)
 
-  de_thr = 1d-7
+  de_thr = 1d-6
 
   do i = 1, m-1
     ei = eigval(i)
@@ -1082,7 +1082,7 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
   double precision, allocatable   :: S(:,:), tmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
-  print *, ' apply SVD to orthogonalize vectors'
+  print *, ' apply SVD to orthogonalize & normalize weighted vectors'
 
   ! ---
 
@@ -1097,7 +1097,7 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' eigenvec overlap bef SVD: '
+  print *, ' overlap bef SVD: '
   do i = 1, m
     write(*, '(1000(F16.10,X))') S(i,:)
   enddo
@@ -1160,7 +1160,7 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' eigenvec overlap aft SVD: '
+  print *, ' overlap aft SVD: '
   do i = 1, m
     write(*, '(1000(F16.10,X))') S(i,:)
   enddo
@@ -1185,7 +1185,7 @@ subroutine impose_orthog_svd(n, m, C)
   double precision, allocatable   :: S(:,:), tmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
-  print *, ' apply SVD to orthogonalize vectors'
+  print *, ' apply SVD to orthogonalize & normalize vectors'
 
   ! ---
 
@@ -1213,6 +1213,7 @@ subroutine impose_orthog_svd(n, m, C)
   num_linear_dependencies = 0
   do i = 1, m
     if(abs(D(i)) <= threshold) then
+      write(*,*) ' D(i) = ', D(i)
       D(i) = 0.d0
       num_linear_dependencies += 1
     else
@@ -1268,7 +1269,7 @@ end subroutine impose_orthog_svd
 
 ! ---
 
-subroutine impose_orthog_svd_overlap(n, m, C,overlap)
+subroutine impose_orthog_svd_overlap(n, m, C, overlap)
 
   implicit none
 
@@ -1278,27 +1279,27 @@ subroutine impose_orthog_svd_overlap(n, m, C,overlap)
 
   integer                         :: i, j, num_linear_dependencies
   double precision                :: threshold
-  double precision, allocatable   :: S(:,:), tmp(:,:),Stmp(:,:)
+  double precision, allocatable   :: S(:,:), tmp(:,:), Stmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
   print *, ' apply SVD to orthogonalize vectors'
 
   ! ---
 
-  allocate(S(m,m),Stmp(n,m))
-
   ! S = C.T x overlap x C
-  call dgemm( 'N', 'N', n, m, n, 1.d0      &
+  allocate(S(m,m), Stmp(n,m))
+  call dgemm( 'N', 'N', n, m, n, 1.d0                  &
             , overlap, size(overlap, 1), C, size(C, 1) &
             , 0.d0, Stmp, size(Stmp, 1) )
-  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+  call dgemm( 'T', 'N', m, m, n, 1.d0            &
             , C, size(C, 1), Stmp, size(Stmp, 1) &
             , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
 
-! print *, ' eigenvec overlap bef SVD: '
-! do i = 1, m
-!   write(*, '(1000(F16.10,X))') S(i,:)
-! enddo
+  print *, ' eigenvec overlap bef SVD: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
 
   ! ---
  
@@ -1347,23 +1348,23 @@ subroutine impose_orthog_svd_overlap(n, m, C,overlap)
 
   ! ---
 
-  allocate(S(m,m))
-
-  ! S = C.T x C
-  call dgemm( 'T', 'N', m, m, n, 1.d0      &
-            , C, size(C, 1), C, size(C, 1) &
+  ! S = C.T x overlap x C
+  allocate(S(m,m), Stmp(n,m))
+  call dgemm( 'N', 'N', n, m, n, 1.d0                  &
+            , overlap, size(overlap, 1), C, size(C, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'T', 'N', m, m, n, 1.d0            &
+            , C, size(C, 1), Stmp, size(Stmp, 1) &
             , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
 
-! print *, ' eigenvec overlap aft SVD: '
-! do i = 1, m
-!   write(*, '(1000(F16.10,X))') S(i,:)
-! enddo
-
+  print *, ' eigenvec overlap aft SVD: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
   deallocate(S)
 
-  ! ---
-
-end subroutine impose_orthog_svd
+end subroutine impose_orthog_svd_overlap
 
 ! ---
 
@@ -1379,7 +1380,7 @@ subroutine impose_orthog_GramSchmidt(n, m, C)
   double precision, allocatable   :: S(:,:)
 
   print *, ''
-  print *, ' apply Gram-Schmidt to orthogonalize vectors'
+  print *, ' apply Gram-Schmidt to orthogonalize & normalize vectors'
   print *, ''
 
   ! ---
@@ -1663,22 +1664,19 @@ end subroutine get_halfinv_svd
 
 ! ---
 
-subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot)
+subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
 
   implicit none
   
   integer,          intent(in)    :: n, m
   logical,          intent(in)    :: stop_ifnot
+  double precision, intent(in)    :: thr_d, thr_nd
   double precision, intent(inout) :: Vl(n,m), Vr(n,m)
 
   integer                         :: i, j
-  double precision                :: thr_d, thr_nd
   double precision                :: accu_d, accu_nd, s_tmp
   double precision, allocatable   :: S(:,:)
 
-  thr_d  = 1d-6
-  thr_nd = 1d-7
-
   print *, ' check bi-orthonormality'
 
   ! ---
@@ -1694,11 +1692,13 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot)
 
   ! S(i,i) = -1
   do i = 1, m
-    if( (S(i,i) + 1.d0) .lt. thr_d ) then
+    if(S(i,i) .lt. 0.d0) then
+    !if( (S(i,i) + 1.d0) .lt. thr_d ) then
       do j = 1, n
         Vl(j,i) = -1.d0 * Vl(j,i)
       enddo
-      S(i,i) = 1.d0
+      !S(i,i) = 1.d0
+      S(i,i) = -S(i,i)
     endif
   enddo
 
@@ -1713,7 +1713,7 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot)
       endif
     enddo
   enddo
-  accu_nd = dsqrt(accu_nd)
+  accu_nd = dsqrt(accu_nd) / dble(m)
   print*, '    diag acc: ', accu_d
   print*, ' nondiag acc: ', accu_nd
 
@@ -1730,6 +1730,7 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot)
           Vr(j,i) = Vr(j,i) * s_tmp 
         enddo
       endif
+
     enddo
 
   endif
@@ -1755,7 +1756,7 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot)
       endif
     enddo
   enddo
-  accu_nd = dsqrt(accu_nd)
+  accu_nd = dsqrt(accu_nd) / dble(m)
   print *, '    diag acc: ', accu_d
   print *, ' nondiag acc: ', accu_nd
 
@@ -1774,22 +1775,19 @@ end subroutine check_biorthog_binormalize
 
 ! ---
 
-subroutine check_weighted_biorthog(n, m, W, Vl, Vr, accu_d, accu_nd, S, stop_ifnot)
+subroutine check_weighted_biorthog(n, m, W, Vl, Vr, thr_d, thr_nd, accu_d, accu_nd, S, stop_ifnot)
 
   implicit none
   
   integer,          intent(in)  :: n, m
   double precision, intent(in)  :: Vl(n,m), Vr(n,m), W(n,n)
+  double precision, intent(in)  :: thr_d, thr_nd
   logical,          intent(in)  :: stop_ifnot
   double precision, intent(out) :: accu_d, accu_nd, S(m,m)
 
   integer                       :: i, j
-  double precision              :: thr_d, thr_nd
   double precision, allocatable :: SS(:,:), tmp(:,:)
 
-  thr_d  = 1d-6
-  thr_nd = 1d-08
-
   print *, ' check weighted bi-orthogonality'
 
   ! ---
@@ -1841,22 +1839,19 @@ end subroutine check_weighted_biorthog
 
 ! ---
 
-subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, stop_ifnot)
+subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_ifnot)
 
   implicit none
   
   integer,          intent(in)  :: n, m
   double precision, intent(in)  :: Vl(n,m), Vr(n,m)
   logical,          intent(in)  :: stop_ifnot
+  double precision, intent(in)  :: thr_d, thr_nd
   double precision, intent(out) :: accu_d, accu_nd, S(m,m)
 
   integer                       :: i, j
-  double precision              :: thr_d, thr_nd
   double precision, allocatable :: SS(:,:)
 
-  thr_d  = 1d-6
-  thr_nd = 1d-08
-
   print *, ' check bi-orthogonality'
 
   ! ---
@@ -1880,7 +1875,7 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, stop_ifnot)
       endif
     enddo
   enddo
-  accu_nd = dsqrt(accu_nd)
+  accu_nd = dsqrt(accu_nd) / dble(m)
 
   print *, ' accu_nd = ', accu_nd
   print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
@@ -1988,7 +1983,7 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0)
   
   do i = 1, n
     if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i)
+      print *, ' degen on', i, deg_num(i), e0(i)
     endif
   enddo
 
@@ -2011,6 +2006,8 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0)
 
       call impose_orthog_svd(n, m, L)
       call impose_orthog_svd(n, m, R)
+      !call impose_orthog_GramSchmidt(n, m, L)
+      !call impose_orthog_GramSchmidt(n, m, R)
 
       ! ---
 
@@ -2029,7 +2026,7 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0)
 
       call impose_biorthog_svd(n, m, L, R)
 
-      !call impose_biorthog_qr(n, m, L, R)
+      !call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R)
 
       ! ---
 
@@ -2047,11 +2044,12 @@ end subroutine impose_biorthog_degen_eigvec
 
 ! ---
 
-subroutine impose_orthog_biorthog_degen_eigvec(n, e0, L0, R0)
+subroutine impose_orthog_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, L0, R0)
 
   implicit none
 
   integer,          intent(in)    :: n
+  double precision, intent(in)    :: thr_d, thr_nd
   double precision, intent(in)    :: e0(n)
   double precision, intent(inout) :: L0(n,n), R0(n,n)
 
@@ -2116,12 +2114,12 @@ subroutine impose_orthog_biorthog_degen_eigvec(n, e0, L0, R0)
 
       ! ---
   
-      call impose_biorthog_qr(n, m, L, R)
+      call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R)
 
       allocate(S(m,m))
-      call check_biorthog(n, m, L, R, accu_d, accu_nd, S, .true.)
-      !call check_biorthog(n, m, L, L, accu_d, accu_nd, S, .true.)
-      !call check_biorthog(n, m, R, R, accu_d, accu_nd, S, .false.)
+      call check_biorthog(n, m, L, R, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
+      !call check_biorthog(n, m, L, L, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
+      !call check_biorthog(n, m, R, R, accu_d, accu_nd, S, thr_d, thr_nd, .false.)
       deallocate(S)
 
       ! ---
@@ -2140,11 +2138,12 @@ end subroutine impose_orthog_biorthog_degen_eigvec
 
 ! ---
 
-subroutine impose_unique_biorthog_degen_eigvec(n, e0, C0, W0, L0, R0)
+subroutine impose_unique_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, C0, W0, L0, R0)
 
   implicit none
 
   integer,          intent(in)    :: n
+  double precision, intent(in)    :: thr_d, thr_nd
   double precision, intent(in)    :: e0(n), W0(n,n), C0(n,n)
   double precision, intent(inout) :: L0(n,n), R0(n,n)
 
@@ -2255,7 +2254,7 @@ subroutine impose_unique_biorthog_degen_eigvec(n, e0, C0, W0, L0, R0)
       call get_inv_half_nonsymmat_diago(S, m, S_inv_half, complex_root)
       if(complex_root)then
         call impose_biorthog_svd(n, m, L, R)
-        !call impose_biorthog_qr(n, m, L, R)
+        !call impose_biorthog_qr(n, m, thr_d, thr_nd, L, R)
       else
         call bi_ortho_s_inv_half(m, L, R, S_inv_half)
       endif
@@ -2502,8 +2501,286 @@ end subroutine impose_biorthog_svd
 
 ! ---
 
+subroutine impose_weighted_biorthog_qr(m, n, thr_d, thr_nd, Vl, W, Vr)
 
-subroutine impose_biorthog_svd_overlap(n, m, overlap, L, R)
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(inout) :: Vl(m,n), W(m,m), Vr(m,n)
+
+  integer                         :: i, j
+  integer                         :: LWORK, INFO
+  double precision                :: accu_nd, accu_d
+  double precision, allocatable   :: TAU(:), WORK(:)
+  double precision, allocatable   :: S(:,:), R(:,:), tmp(:,:), Stmp(:,:)
+
+
+  call check_weighted_biorthog_binormalize(m, n, Vl, W, Vr, thr_d, thr_nd, .false.)
+  
+  ! ---
+  
+  allocate(Stmp(n,m), S(n,n))
+  call dgemm( 'T', 'N', n, m, m, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'N', 'N', n, n, m, 1.d0              &
+            , Stmp, size(Stmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) then
+        accu_d += S(j,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n))/dble(n) .lt. thr_d)) then
+    print *, ' bi-orthogonal vectors without QR !'
+    deallocate(S)
+    return
+  endif
+
+  ! -------------------------------------------------------------------------------------
+  !                           QR factorization of S: S = Q x R
+
+
+  print *, ' apply QR decomposition ...'
+
+  allocate( TAU(n), WORK(1) )
+
+  LWORK = -1
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  ! save the upper triangular R
+  allocate( R(n,n) )
+  R(:,:) = S(:,:)
+
+  ! get Q
+  LWORK = -1
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, TAU )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               get bi-orhtog left & right vectors:
+  !                                           Vr' = Vr x inv(R) 
+  !                                           Vl' = inv(Q) x Vl =  Q.T   x Vl 
+
+  ! Q.T x Vl, where Q = S
+
+  allocate( tmp(n,m) )
+  call dgemm( 'T', 'T', n, m, n, 1.d0        &
+            , S, size(S, 1), Vl, size(Vl, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  do i = 1, n
+    do j = 1, m
+      Vl(j,i) = tmp(i,j)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  ! ---
+
+  ! inv(R) 
+  !print *, ' inversing upper triangular matrix ...'
+  call dtrtri("U", "N", n, R, n, INFO)
+  if(INFO .ne. 0) then
+    print*,'dtrtri failed !!', INFO
+    stop
+  endif
+  !print *, ' inversing upper triangular matrix OK' 
+
+  do i = 1, n-1
+    do j = i+1, n
+      R(j,i) = 0.d0
+    enddo
+  enddo
+
+  !print *, ' inv(R):'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') R(i,:)
+  !enddo
+
+  ! Vr x inv(R) 
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'N', m, n, n, 1.d0        &
+            , Vr, size(Vr, 1), R, size(R, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate( R )
+
+  do i = 1, n
+    do j = 1, m
+      Vr(j,i) = tmp(j,i)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  call check_weighted_biorthog_binormalize(m, n, Vl, W, Vr, thr_d, thr_nd, .false.)
+
+  return
+end subroutine impose_weighted_biorthog_qr
+
+! ---
+
+subroutine check_weighted_biorthog_binormalize(n, m, Vl, W, Vr, thr_d, thr_nd, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)    :: n, m
+  logical,          intent(in)    :: stop_ifnot
+  double precision, intent(in)    :: thr_d, thr_nd
+  double precision, intent(inout) :: Vl(n,m), W(n,n), Vr(n,m)
+
+  integer                         :: i, j
+  double precision                :: accu_d, accu_nd, s_tmp
+  double precision, allocatable   :: S(:,:), Stmp(:,:)
+
+  print *, ' check weighted bi-orthonormality'
+
+  ! ---
+
+  allocate(Stmp(m,n), S(m,m))
+  call dgemm( 'T', 'N', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0              &
+            , Stmp, size(Stmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+  !print *, ' overlap matrix before:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! S(i,i) = -1
+  do i = 1, m
+    if( (S(i,i) + 1.d0) .lt. thr_d ) then
+      do j = 1, n
+        Vl(j,i) = -1.d0 * Vl(j,i)
+      enddo
+      S(i,i) = 1.d0
+    endif
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+  print*, '    diag acc: ', accu_d
+  print*, ' nondiag acc: ', accu_nd
+
+  ! ---
+
+  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+
+    do i = 1, m
+      print *, i, S(i,i)
+      if(dabs(S(i,i) - 1.d0) .gt. thr_d) then
+        s_tmp = 1.d0 / dsqrt(S(i,i))
+        do j = 1, n
+          Vl(j,i) = Vl(j,i) * s_tmp 
+          Vr(j,i) = Vr(j,i) * s_tmp 
+        enddo
+      endif
+    enddo
+
+  endif
+
+  ! ---
+
+  allocate(Stmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0              &
+            , Stmp, size(Stmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
+  !print *, ' overlap matrix after:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd) / dble(m)
+  print *, '    diag acc: ', accu_d
+  print *, ' nondiag acc: ', accu_nd
+
+  deallocate(S)
+
+  ! ---
+
+  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) ) then
+    print *, accu_nd, thr_nd 
+    print *, dabs(accu_d-dble(m))/dble(m), thr_d
+    print *, ' weighted biorthog_binormalize failed !'
+    stop
+  endif
+
+end subroutine check_weighted_biorthog_binormalize
+
+! ---
+
+subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
 
   implicit none
 
@@ -2527,6 +2804,7 @@ subroutine impose_biorthog_svd_overlap(n, m, overlap, L, R)
   call dgemm( 'T', 'N', m, m, n, 1.d0      &
             , L, size(L, 1), Stmp, size(Stmp, 1) &
             , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
 
   print *, ' overlap bef SVD: '
   do i = 1, m
@@ -2598,10 +2876,7 @@ subroutine impose_biorthog_svd_overlap(n, m, overlap, L, R)
 
   ! ---
 
-  allocate(S(m,m))
-!  call dgemm( 'T', 'N', m, m, n, 1.d0      &
-!            , L, size(L, 1), R, size(R, 1) &
-!            , 0.d0, S, size(S, 1) )
+  allocate(S(m,m),Stmp(n,m))
   ! S = C.T x overlap x C
   call dgemm( 'N', 'N', n, m, n, 1.d0      &
             , overlap, size(overlap, 1), R, size(R, 1) &
@@ -2609,6 +2884,7 @@ subroutine impose_biorthog_svd_overlap(n, m, overlap, L, R)
   call dgemm( 'T', 'N', m, m, n, 1.d0      &
             , L, size(L, 1), Stmp, size(Stmp, 1) &
             , 0.d0, S, size(S, 1) )
+  deallocate(Stmp)
 
   print *, ' overlap aft SVD with overlap: '
   do i = 1, m
@@ -2616,9 +2892,8 @@ subroutine impose_biorthog_svd_overlap(n, m, overlap, L, R)
   enddo
   deallocate(S)
 
-  ! ---
-
-end subroutine impose_biorthog_svd
+  return
+end subroutine impose_weighted_biorthog_svd
 
 ! ---
 
diff --git a/src/tc_bi_ortho/print_he_tc_energy.irp.f b/src/tc_bi_ortho/print_he_tc_energy.irp.f
new file mode 100644
index 00000000..84d34bcb
--- /dev/null
+++ b/src/tc_bi_ortho/print_he_tc_energy.irp.f
@@ -0,0 +1,142 @@
+
+! ---
+
+program print_he_tc_energy
+
+  implicit none
+
+  call print_overlap()
+
+  call print_energy1()
+
+end 
+
+! ---
+
+subroutine print_overlap()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: S_ij
+
+  print *, ' ao_overlap:'
+  do i = 1, ao_num
+    do j = 1, ao_num
+      print *, j, i, ao_overlap(j,i)
+    enddo
+  enddo
+
+  print *, ' mo_overlap:'
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      S_ij = 0.d0
+      do k = 1, ao_num
+        do l = 1, ao_num
+          S_ij += mo_l_coef(k,i) * ao_overlap(k,l) * mo_r_coef(l,j)
+        enddo
+      enddo
+      
+      print *, i, j, S_ij
+    enddo
+  enddo
+
+end subroutine print_overlap
+
+! ---
+
+subroutine print_energy1()
+
+  implicit none
+  integer                    :: i, j, k, l
+  double precision           :: e, n, e_tmp, n_tmp, e_ns
+  double precision, external :: ao_two_e_integral
+
+  e = 0.d0
+  n = 0.d0
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! < phi_1 phi_1 | h1 | phi_1 phi_1 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_l_coef(i,1) * ao_one_e_integrals(i,j) * mo_r_coef(j,1)
+      n_tmp += mo_l_coef(i,1) * ao_overlap(i,j)         * mo_r_coef(j,1)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_1 phi_1 | h2 | phi_1 phi_1 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      n_tmp += mo_l_coef(i,1) * ao_overlap(i,j)         * mo_r_coef(j,1)
+      e_tmp += mo_l_coef(i,1) * ao_one_e_integrals(i,j) * mo_r_coef(j,1)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! ---
+
+  e_ns = 0.d0
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          ! ao_two_e_tc_tot(i,j,k,l) = <k i| V^TC(r_12) |l j>
+          e += mo_l_coef(i,1) * mo_l_coef(k,1) * ao_two_e_tc_tot(i,j,k,l) * mo_r_coef(j,1) * mo_r_coef(l,1)
+
+          e_ns += mo_l_coef(i,1) * mo_l_coef(k,1) * ao_non_hermit_term_chemist(i,j,k,l) * mo_r_coef(j,1) * mo_r_coef(l,1)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! ---
+
+  ! < phi_1 phi_1 | phi_1 phi_1 >
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_l_coef(i,1) * ao_overlap(i,j) * mo_r_coef(j,1)
+      n_tmp += mo_l_coef(i,1) * ao_overlap(i,j) * mo_r_coef(j,1)
+    enddo
+  enddo
+
+  n += e_tmp * n_tmp
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  e    = e    / n 
+  e_ns = e_ns / n
+
+  print *, ' tc      energy = ', e
+  print *, ' non-sym energy = ', e_ns
+
+end subroutine print_energy1
+
+! ---
+
+
diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f
index 4bfb2da3..a56a432f 100644
--- a/src/tc_bi_ortho/slater_tc_3e.irp.f
+++ b/src/tc_bi_ortho/slater_tc_3e.irp.f
@@ -66,7 +66,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
      enddo
     enddo
    enddo
-   print*,'aab = ',accu
+   !print*,'aab = ',accu
   
    ! beta/beta/alpha three-body
   accu = 0.d0
@@ -83,7 +83,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
      enddo
     enddo
    enddo
-   print*,'abb = ',accu
+   !print*,'abb = ',accu
 
    ! alpha/alpha/alpha three-body
    accu = 0.d0
@@ -99,7 +99,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
      enddo
     enddo
    enddo
-   print*,'aaa = ',accu
+   !print*,'aaa = ',accu
 
    ! beta/beta/beta three-body
    accu = 0.d0
@@ -115,7 +115,7 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
      enddo
     enddo
    enddo
-   print*,'bbb = ',accu
+   !print*,'bbb = ',accu
   endif
 
 end
diff --git a/src/tc_bi_ortho/tc_hmat.irp.f b/src/tc_bi_ortho/tc_hmat.irp.f
index bf1388e5..44e27e7c 100644
--- a/src/tc_bi_ortho/tc_hmat.irp.f
+++ b/src/tc_bi_ortho/tc_hmat.irp.f
@@ -18,6 +18,10 @@
       do j = 1, N_det
         ! < J | Htilde | I >
         call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+
+        !print *, ' hmono  = ', hmono
+        !print *, ' htwoe  = ', htwoe
+        !print *, ' hthree = ', hthree
         htilde_matrix_elmt_bi_ortho(j,i) = htot
       enddo
     enddo
diff --git a/src/tc_bi_ortho/tc_natorb.irp.f b/src/tc_bi_ortho/tc_natorb.irp.f
index 4144fcad..b2ccb091 100644
--- a/src/tc_bi_ortho/tc_natorb.irp.f
+++ b/src/tc_bi_ortho/tc_natorb.irp.f
@@ -1,125 +1,150 @@
 
-  BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_mo, (mo_num, mo_num)]
- &BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_mo, (mo_num, mo_num)]
- &BEGIN_PROVIDER [ double precision, natorb_tc_eigval, (mo_num)]
+! ---
+
+ BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, natorb_tc_eigval, (mo_num)]
+
+  BEGIN_DOC
+  !
+  ! natorb_tc_reigvec_mo : RIGHT eigenvectors of the ground state transition matrix (equivalent of natural orbitals)
+  ! natorb_tc_leigvec_mo : LEFT  eigenvectors of the ground state transition matrix (equivalent of natural orbitals)
+  ! natorb_tc_eigval     : eigenvalues of the ground state transition matrix (equivalent of the occupation numbers). WARNINING :: can be negative !!
+  !
+  END_DOC
+
   implicit none
- BEGIN_DOC
- ! natorb_tc_reigvec_mo : RIGHT eigenvectors of the ground state transition matrix (equivalent of natural orbitals)
- ! natorb_tc_leigvec_mo : LEFT  eigenvectors of the ground state transition matrix (equivalent of natural orbitals)
- ! natorb_tc_eigval     : eigenvalues of the ground state transition matrix (equivalent of the occupation numbers). WARNINING :: can be negative !!
- END_DOC
-  double precision, allocatable :: dm_tmp(:,:),fock_diag(:)
-  double precision :: thr_deg
-  integer :: i,j,k,n_real
-  allocate( dm_tmp(mo_num,mo_num),fock_diag(mo_num))
+  integer                       :: i, j, k, n_real
+  double precision              :: thr_d, thr_nd, thr_deg, accu
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: dm_tmp(:,:), fock_diag(:)
+
+  allocate(dm_tmp(mo_num,mo_num), fock_diag(mo_num))
+
   dm_tmp(:,:) = -tc_transition_matrix(:,:,1,1)
-  print*,'dm_tmp'
+
+  print *, ' dm_tmp'
   do i = 1, mo_num
-   fock_diag(i) = fock_matrix_tc_mo_tot(i,i)
-   write(*,'(100(F16.10,X))')-dm_tmp(:,i)
+    fock_diag(i) = fock_matrix_tc_mo_tot(i,i)
+    write(*, '(100(F16.10,X))') -dm_tmp(:,i)
   enddo
+
+  thr_d   = 1.d-6
+  thr_nd  = 1.d-6
   thr_deg = 1.d-3
-  call diag_mat_per_fock_degen(fock_diag,dm_tmp,mo_num,thr_deg,& 
-                               natorb_tc_leigvec_mo,natorb_tc_reigvec_mo,&
-                               natorb_tc_eigval)
+  call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & 
+                              , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval)
 !   call non_hrmt_bieig( mo_num, dm_tmp&
 !                      , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& 
 !                      , n_real, natorb_tc_eigval )
- double precision :: accu
+
   accu = 0.d0
   do i = 1, n_real
-   print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i)
-   accu += -natorb_tc_eigval(i)
+    print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i)
+    accu += -natorb_tc_eigval(i)
   enddo
-  print*,'accu = ',accu
+  print *, ' accu = ', accu
+
   dm_tmp = 0.d0
   do i = 1, n_real
-   accu = 0.d0
-   do k = 1, mo_num
-    accu += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,i)
-   enddo
-   accu = 1.d0/dsqrt(dabs(accu))
-   natorb_tc_reigvec_mo(:,i) *= accu
-   natorb_tc_leigvec_mo(:,i) *= accu
-   do j = 1, n_real
+    accu = 0.d0
     do k = 1, mo_num
-     dm_tmp(j,i) += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,j)
+      accu += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,i)
+    enddo
+    accu = 1.d0/dsqrt(dabs(accu))
+    natorb_tc_reigvec_mo(:,i) *= accu
+    natorb_tc_leigvec_mo(:,i) *= accu
+    do j = 1, n_real
+      do k = 1, mo_num
+        dm_tmp(j,i) += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,j)
+      enddo
     enddo
-   enddo
   enddo
-  double precision :: accu_d, accu_nd
-  accu_d = 0.d0
+
+  accu_d  = 0.d0
   accu_nd = 0.d0
   do i = 1, mo_num
-   accu_d += dm_tmp(i,i)
- !  write(*,'(100(F16.10,X))')dm_tmp(:,i)
-   do j = 1, mo_num
-    if(i==j)cycle
-    accu_nd += dabs(dm_tmp(j,i))
-   enddo
+    accu_d += dm_tmp(i,i)
+    !write(*,'(100(F16.10,X))')dm_tmp(:,i)
+    do j = 1, mo_num
+      if(i==j)cycle
+      accu_nd += dabs(dm_tmp(j,i))
+    enddo
   enddo
-  print*,'Trace of the overlap between TC natural orbitals     ',accu_d
-  print*,'L1 norm of extra diagonal elements of overlap matrix ',accu_nd
+  print *, ' Trace of the overlap between TC natural orbitals     ', accu_d
+  print *, ' L1 norm of extra diagonal elements of overlap matrix ', accu_nd
+
+  deallocate(dm_tmp, fock_diag)
  
+END_PROVIDER 
+
+! ---
  
- END_PROVIDER 
- 
-  BEGIN_PROVIDER [ double precision, fock_diag_sorted_r_natorb, (mo_num, mo_num)]
- &BEGIN_PROVIDER [ double precision, fock_diag_sorted_l_natorb, (mo_num, mo_num)]
- &BEGIN_PROVIDER [ double precision, fock_diag_sorted_v_natorb, (mo_num)]
+ BEGIN_PROVIDER [ double precision, fock_diag_sorted_r_natorb, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, fock_diag_sorted_l_natorb, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, fock_diag_sorted_v_natorb, (mo_num)]
+
   implicit none
-  integer ::i,j,k
-  print*,'Diagonal elements of the Fock matrix before '
-  do i = 1, mo_num
-   write(*,*)i,Fock_matrix_tc_mo_tot(i,i)
-  enddo
+  integer                       :: i,j,k
+  integer,          allocatable :: iorder(:)
   double precision, allocatable :: fock_diag(:)
+
+  print *, ' Diagonal elements of the Fock matrix before '
+
+  do i = 1, mo_num
+   write(*,*) i, Fock_matrix_tc_mo_tot(i,i)
+  enddo
+
   allocate(fock_diag(mo_num))
   fock_diag = 0.d0
   do i = 1, mo_num
-   fock_diag(i) = 0.d0
-   do j = 1, mo_num
-    do k = 1, mo_num
-     fock_diag(i) += natorb_tc_leigvec_mo(k,i) * Fock_matrix_tc_mo_tot(k,j) * natorb_tc_reigvec_mo(j,i) 
+    fock_diag(i) = 0.d0
+    do j = 1, mo_num
+      do k = 1, mo_num
+        fock_diag(i) += natorb_tc_leigvec_mo(k,i) * Fock_matrix_tc_mo_tot(k,j) * natorb_tc_reigvec_mo(j,i) 
+      enddo
     enddo
-   enddo
   enddo
-  integer, allocatable :: iorder(:)
+
   allocate(iorder(mo_num))
   do i = 1, mo_num
    iorder(i) = i
   enddo 
-  call dsort(fock_diag,iorder,mo_num)
-  print*,'Diagonal elements of the Fock matrix after '
+  call dsort(fock_diag, iorder, mo_num)
+
+  print *, ' Diagonal elements of the Fock matrix after '
   do i = 1, mo_num
-   write(*,*)i,fock_diag(i)
+   write(*,*) i, fock_diag(i)
   enddo
+  deallocate(fock_diag)
+
   do i = 1, mo_num 
-   fock_diag_sorted_v_natorb(i) = natorb_tc_eigval(iorder(i))
-   do j = 1, mo_num
-    fock_diag_sorted_r_natorb(j,i) = natorb_tc_reigvec_mo(j,iorder(i))
-    fock_diag_sorted_l_natorb(j,i) = natorb_tc_leigvec_mo(j,iorder(i))
-   enddo
+    fock_diag_sorted_v_natorb(i) = natorb_tc_eigval(iorder(i))
+    do j = 1, mo_num
+      fock_diag_sorted_r_natorb(j,i) = natorb_tc_reigvec_mo(j,iorder(i))
+      fock_diag_sorted_l_natorb(j,i) = natorb_tc_leigvec_mo(j,iorder(i))
+    enddo
   enddo
+  deallocate(iorder)
  
- END_PROVIDER 
+END_PROVIDER 
  
+! --- 
  
+ BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_ao, (ao_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_ao, (ao_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_natorb_tc_eigvec_ao, (mo_num, mo_num) ]
  
-  BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_ao, (ao_num, mo_num)]
- &BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_ao, (ao_num, mo_num)]
- &BEGIN_PROVIDER [ double precision, overlap_natorb_tc_eigvec_ao, (mo_num, mo_num) ]
+  BEGIN_DOC
+  ! EIGENVECTORS OF FOCK MATRIX ON THE AO BASIS and their OVERLAP
+  !
+  ! THE OVERLAP SHOULD BE THE SAME AS overlap_natorb_tc_eigvec_mo
+  END_DOC
  
-   BEGIN_DOC
-   ! EIGENVECTORS OF FOCK MATRIX ON THE AO BASIS and their OVERLAP
-   !
-   ! THE OVERLAP SHOULD BE THE SAME AS overlap_natorb_tc_eigvec_mo
-   END_DOC
- 
-   implicit none
-   integer                       :: i, j, k, q, p
-   double precision              :: accu, accu_d
-   double precision, allocatable :: tmp(:,:)
+  implicit none
+  integer                       :: i, j, k, q, p
+  double precision              :: accu, accu_d
+  double precision, allocatable :: tmp(:,:)
  
  
  !  ! MO_R x R
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index 4c35d88c..e506d1cc 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -118,7 +118,6 @@ doc: type of 1-body Jastrow
 interface: ezfio, provider, ocaml
 default: 0
 
-
 [thr_degen_tc]
 type: Threshold
 doc: Threshold to determine if two orbitals are degenerate in TCSCF in order to avoid random quasi orthogonality between the right- and left-eigenvector for the same eigenvalue
@@ -130,3 +129,10 @@ type: logical
 doc: If |true|, maximize the overlap between orthogonalized left- and right eigenvectors 
 interface: ezfio,provider,ocaml
 default: False
+
+[ng_fit_jast]
+type: integer
+doc: nb of Gaussians used to fit Jastrow fcts
+interface: ezfio,provider,ocaml
+default: 6
+
diff --git a/src/tc_scf/diago_bi_ort_tcfock.irp.f b/src/tc_scf/diago_bi_ort_tcfock.irp.f
index 00558de6..856b7382 100644
--- a/src/tc_scf/diago_bi_ort_tcfock.irp.f
+++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f
@@ -11,20 +11,24 @@
   integer                       :: n_real_tc 
   integer                       :: i, k, l
   double precision              :: accu_d, accu_nd, accu_tmp
+  double precision              :: thr_d, thr_nd
   double precision              :: norm
   double precision, allocatable :: eigval_right_tmp(:)
 
+  thr_d  = 1d-6
+  thr_nd = 1d-6
+
   allocate( eigval_right_tmp(mo_num) )
 
   PROVIDE Fock_matrix_tc_mo_tot
 
- call non_hrmt_bieig( mo_num, Fock_matrix_tc_mo_tot          &
-                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
-                    , n_real_tc, eigval_right_tmp )
+  call non_hrmt_bieig( mo_num, Fock_matrix_tc_mo_tot, thr_d, thr_nd &
+                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo       & 
+                     , n_real_tc, eigval_right_tmp )
   !if(max_ov_tc_scf)then
-!    call non_hrmt_fock_mat( mo_num, Fock_matrix_tc_mo_tot &
-!                       , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
-!                       , n_real_tc, eigval_right_tmp )
+  ! call non_hrmt_fock_mat( mo_num, Fock_matrix_tc_mo_tot, thr_d, thr_nd &
+  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
+  !                    , n_real_tc, eigval_right_tmp )
   !else 
   ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, Fock_matrix_tc_mo_tot &
   !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
@@ -59,17 +63,17 @@
       else
         accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
         accu_nd += accu_tmp * accu_tmp
-        if(dabs(overlap_fock_tc_eigvec_mo(k,i)).gt.1.d-10)then
-         print*,'k,i',k,i,overlap_fock_tc_eigvec_mo(k,i)
+        if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thr_nd)then
+         print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
         endif
       endif
     enddo 
   enddo
   accu_nd = dsqrt(accu_nd)/accu_d
 
-  if( accu_nd .gt. 1d-8 ) then
+  if(accu_nd .gt. thr_nd) then
     print *, ' bi-orthog failed'
-    print*,'accu_nd MO = ', accu_nd
+    print*,'accu_nd MO = ', accu_nd, thr_nd
     print*,'overlap_fock_tc_eigvec_mo = '
     do i = 1, mo_num
       write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
@@ -77,13 +81,13 @@
    stop
   endif
 
-  if( dabs(accu_d - dble(mo_num)) .gt. 1e-7 ) then
+  if( dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thr_d ) then
     print *, 'mo_num     = ', mo_num 
-    print *, 'accu_d  MO = ', accu_d
+    print *, 'accu_d  MO = ', accu_d, thr_d
     print *, 'normalizing vectors ...'
     do i = 1, mo_num
       norm = dsqrt(dabs(overlap_fock_tc_eigvec_mo(i,i)))
-      if( norm.gt.1e-7 ) then
+      if(norm .gt. thr_d) then
         do k = 1, mo_num
           fock_tc_reigvec_mo(k,i) *= 1.d0/norm
           fock_tc_leigvec_mo(k,i) *= 1.d0/norm
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
index ca500fbb..6b1c1d77 100644
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@@ -122,6 +122,7 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
   endif
 END_PROVIDER
 
+! ---
 
 !BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num, mo_num)]
 !  implicit none
diff --git a/src/tc_scf/print_fit_param.irp.f b/src/tc_scf/print_fit_param.irp.f
new file mode 100644
index 00000000..f8bcfa7f
--- /dev/null
+++ b/src/tc_scf/print_fit_param.irp.f
@@ -0,0 +1,60 @@
+program print_fit_param
+
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  !call create_guess
+  !call orthonormalize_mos
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer :: i
+
+  mu_erf = 1.d0
+  touch mu_erf
+
+  print *, ' fit for (1 - erf(x))^2'
+  do i = 1, n_max_fit_slat
+    print*, expo_gauss_1_erf_x_2(i), coef_gauss_1_erf_x_2(i)
+  enddo
+
+  print *, ''
+  print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]'
+  do i = 1, n_max_fit_slat
+    print *, expo_gauss_j_mu_x(i), 2.d0 * coef_gauss_j_mu_x(i)
+  enddo
+
+  print *, ''
+  print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)]^2'
+  do i = 1, n_max_fit_slat
+    print *, expo_gauss_j_mu_x_2(i), 4.d0 * coef_gauss_j_mu_x_2(i)
+  enddo
+
+  print *, ''
+  print *, ' fit for [x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2)] x [1 - erf(mu * r12)]'
+  do i = 1, n_max_fit_slat
+    print *, expo_gauss_j_mu_1_erf(i), 4.d0 * coef_gauss_j_mu_1_erf(i)
+  enddo
+
+  return
+end subroutine main
+
+! ---
+
diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f
index d32d324d..d53991ed 100644
--- a/src/tc_scf/rotate_tcscf_orbitals.irp.f
+++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f
@@ -73,7 +73,7 @@ subroutine maximize_overlap()
 
   ! ---
 
-  call rotate_degen_eigvec(n, m, e, C, W, L, R)
+  call rotate_degen_eigvec_to_maximize_overlap(n, m, e, C, W, L, R)
 
   ! ---
    
@@ -115,7 +115,7 @@ end subroutine maximize_overlap
 
 ! ---
 
-subroutine rotate_degen_eigvec(n, m, e0, C0, W0, L0, R0)
+subroutine rotate_degen_eigvec_to_maximize_overlap(n, m, e0, C0, W0, L0, R0)
 
   implicit none
 
@@ -124,7 +124,7 @@ subroutine rotate_degen_eigvec(n, m, e0, C0, W0, L0, R0)
   double precision, intent(inout) :: L0(n,m), R0(n,m)
 
 
-  integer                         :: i, j, k, kk, mm, id1
+  integer                         :: i, j, k, kk, mm, id1, tot_deg
   double precision                :: ei, ej, de, de_thr
   integer,          allocatable   :: deg_num(:)
   double precision, allocatable   :: L(:,:), R(:,:), C(:,:), Lnew(:,:), Rnew(:,:), tmp(:,:)
@@ -162,12 +162,19 @@ subroutine rotate_degen_eigvec(n, m, e0, C0, W0, L0, R0)
     enddo
   enddo
 
+  tot_deg = 0
   do i = 1, m
     if(deg_num(i).gt.1) then
       print *, ' degen on', i, deg_num(i)
+      tot_deg = tot_deg + 1
     endif
   enddo
 
+  if(tot_deg .eq. 0) then
+    print *, ' no degen'
+    return
+  endif
+
   ! ---
 
   do i = 1, m
@@ -243,6 +250,122 @@ subroutine rotate_degen_eigvec(n, m, e0, C0, W0, L0, R0)
 
   deallocate(S, Snew, T)
 
-end subroutine rotate_degen_eigvec
+end subroutine rotate_degen_eigvec_to_maximize_overlap
+
+! ---
+
+subroutine fix_right_to_one()
+
+  implicit none
+  integer                       :: i, j, m, n, mm, tot_deg
+  double precision              :: accu_d, accu_nd
+  double precision              :: de_thr, ei, ej, de
+  double precision              :: thr_d, thr_nd
+  integer,          allocatable :: deg_num(:)
+  double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:)
+  double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:)
+
+  thr_d  = 1d-7
+  thr_nd = 1d-7
+
+  n = ao_num
+  m = mo_num
+
+  allocate(L0(n,m), R0(n,m), W(n,n), e0(m))
+  L0 = mo_l_coef
+  R0 = mo_r_coef
+  W  = ao_overlap
+
+  print*, ' fock matrix diag elements'
+  do i = 1, m
+    e0(i) = Fock_matrix_tc_mo_tot(i,i)
+    print*, e0(i)
+  enddo
+
+  ! ---
+
+  allocate( deg_num(m) )
+  do i = 1, m
+    deg_num(i) = 1
+  enddo
+
+  de_thr = 1d-6
+
+  do i = 1, m-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, m
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+
+  deallocate(e0)
+
+  tot_deg = 0
+  do i = 1, m
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+      tot_deg = tot_deg + 1
+    endif
+  enddo
+
+  if(tot_deg .eq. 0) then
+    print *, ' no degen'
+    return
+  endif
+
+  ! ---
+
+  do i = 1, m
+    mm = deg_num(i)
+
+    if(mm .gt. 1) then
+
+      allocate(L(n,mm), R(n,mm))
+      do j = 1, mm
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_weighted_orthog_svd(n, mm, W, R)
+      call impose_weighted_biorthog_qr(n, mm, thr_d, thr_nd, R, W, L)
+
+      ! ---
+
+      do j = 1, mm
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+      deallocate(L, R)
+
+    endif
+  enddo
+
+  call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thr_d, thr_nd, .true.)
+
+  deallocate(W, deg_num)
+
+  mo_l_coef = L0
+  mo_r_coef = R0
+  deallocate(L0, R0)
+
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+  print *, ' orbitals are rotated '
+
+  return
+end subroutine fix_right_to_one
 
 ! ---
diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f
index 51c218fa..42925e41 100644
--- a/src/tc_scf/routines_rotates.irp.f
+++ b/src/tc_scf/routines_rotates.irp.f
@@ -1,261 +1,335 @@
-subroutine minimize_tc_orb_angles
- implicit none
- double precision :: thr_deg
- logical :: good_angles
- integer :: i
- good_angles = .False.
- thr_deg = thr_degen_tc
- call print_energy_and_mos
- i = 1
- print*,'Minimizing the angles between the TC orbitals'
- do while (.not. good_angles)
-  print*,'iteration = ',i
-  call routine_save_rotated_mos(thr_deg,good_angles)
-  thr_deg *= 10.d0
-  i+=1
-  if(i.gt.100)then
-   print*,'minimize_tc_orb_angles does not seem to converge ..'
-   print*,'Something is weird in the tc orbitals ...'
-   print*,'STOPPING'
-  endif
- enddo
- print*,'Converged ANGLES MINIMIZATION !!'
- call print_angles_tc
- call print_energy_and_mos
+
+! ---
+
+subroutine minimize_tc_orb_angles()
+
+  implicit none
+  logical          :: good_angles
+  integer          :: i
+  double precision :: thr_deg
+
+  good_angles = .False.
+  thr_deg = thr_degen_tc
+
+  call print_energy_and_mos()
+
+  print *, ' Minimizing the angles between the TC orbitals'
+  i = 1
+  do while (.not. good_angles)
+    print *, ' iteration = ', i
+    call routine_save_rotated_mos(thr_deg, good_angles)
+    thr_deg *= 10.d0
+    i += 1
+    if(i .gt. 100) then
+      print *, ' minimize_tc_orb_angles does not seem to converge ..'
+      print *, ' Something is weird in the tc orbitals ...'
+      print *, ' STOPPING'
+      stop
+    endif
+  enddo
+  print *, ' Converged ANGLES MINIMIZATION !!'
+
+  call print_angles_tc()
+  call print_energy_and_mos()
+
 end
 
-subroutine routine_save_rotated_mos(thr_deg,good_angles)
- implicit none
- double precision, intent(in) :: thr_deg
- logical, intent(out) :: good_angles
- good_angles = .False.
- integer :: i,j,k,n_degen_list,m,n,n_degen,ilast,ifirst
- double precision, allocatable :: mo_r_coef_good(:,:),mo_l_coef_good(:,:)
- allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num))
- double precision, allocatable :: mo_r_coef_new(:,:)
- double precision :: norm
- print*,'***************************************'
- print*,'***************************************'
- print*,'THRESHOLD FOR DEGENERACIES ::: ',thr_deg
- print*,'***************************************'
- print*,'***************************************'
- print*,'Starting with the following TC energy gradient :',grad_non_hermit
- mo_r_coef_good = mo_r_coef
- mo_l_coef_good = mo_l_coef
- allocate(mo_r_coef_new(ao_num, mo_num))
- mo_r_coef_new = mo_r_coef
- do i = 1, mo_num
-  norm = 1.d0/dsqrt(overlap_mo_r(i,i))
-  do j = 1, ao_num
-   mo_r_coef_new(j,i) *= norm
+! ---
+
+subroutine routine_save_rotated_mos(thr_deg, good_angles)
+
+  implicit none
+
+  double precision, intent(in)  :: thr_deg
+  logical,          intent(out) :: good_angles
+
+  integer                       :: i, j, k, n_degen_list, m, n, n_degen, ilast, ifirst
+  double precision              :: max_angle, norm
+  integer,          allocatable :: list_degen(:,:)
+  double precision, allocatable :: new_angles(:)
+  double precision, allocatable :: mo_r_coef_good(:,:), mo_l_coef_good(:,:)
+  double precision, allocatable :: mo_r_coef_new(:,:)
+  double precision, allocatable :: fock_diag(:),s_mat(:,:)
+  double precision, allocatable :: stmp(:,:), T(:,:), Snew(:,:), smat2(:,:)
+  double precision, allocatable :: mo_l_coef_tmp(:,:), mo_r_coef_tmp(:,:), mo_l_coef_new(:,:)
+
+  good_angles = .False.
+
+  allocate(mo_l_coef_good(ao_num, mo_num), mo_r_coef_good(ao_num,mo_num))
+
+  print *, ' ***************************************'
+  print *, ' ***************************************'
+  print *, ' THRESHOLD FOR DEGENERACIES ::: ', thr_deg
+  print *, ' ***************************************'
+  print *, ' ***************************************'
+  print *, ' Starting with the following TC energy gradient :', grad_non_hermit
+
+  mo_r_coef_good = mo_r_coef
+  mo_l_coef_good = mo_l_coef
+
+  allocate(mo_r_coef_new(ao_num, mo_num))
+  mo_r_coef_new = mo_r_coef
+  do i = 1, mo_num
+    norm = 1.d0/dsqrt(overlap_mo_r(i,i))
+    do j = 1, ao_num
+      mo_r_coef_new(j,i) *= norm
+    enddo
   enddo
- enddo
- double precision, allocatable :: fock_diag(:),s_mat(:,:)
- integer, allocatable :: list_degen(:,:)
- allocate(list_degen(mo_num,0:mo_num),s_mat(mo_num,mo_num),fock_diag(mo_num))
- do i = 1, mo_num
-  fock_diag(i) = Fock_matrix_tc_mo_tot(i,i)
- enddo
+
+  allocate(list_degen(mo_num,0:mo_num), s_mat(mo_num,mo_num), fock_diag(mo_num))
+  do i = 1, mo_num
+    fock_diag(i) = Fock_matrix_tc_mo_tot(i,i)
+  enddo
+
  ! compute the overlap between the left and rescaled right
- call build_s_matrix(ao_num,mo_num,mo_r_coef_new,mo_r_coef_new,ao_overlap,s_mat)
+  call build_s_matrix(ao_num, mo_num, mo_r_coef_new, mo_r_coef_new, ao_overlap, s_mat)
 ! call give_degen(fock_diag,mo_num,thr_deg,list_degen,n_degen_list)
- call give_degen_full_list(fock_diag,mo_num,thr_deg,list_degen,n_degen_list)
- print*,'fock_matrix_mo'
- do i = 1, mo_num
-  print*,i,fock_diag(i),angle_left_right(i)
- enddo
+  call give_degen_full_list(fock_diag, mo_num, thr_deg, list_degen, n_degen_list)
+  print *, ' fock_matrix_mo'
+  do i = 1, mo_num
+    print *, i, fock_diag(i), angle_left_right(i)
+  enddo
    
- do i = 1, n_degen_list
+  do i = 1, n_degen_list
 !  ifirst = list_degen(1,i)
 !  ilast  = list_degen(2,i)
 !  n_degen = ilast - ifirst +1
-  n_degen = list_degen(i,0)
-  double precision, allocatable :: stmp(:,:),T(:,:),Snew(:,:),smat2(:,:)
-  double precision, allocatable :: mo_l_coef_tmp(:,:),mo_r_coef_tmp(:,:),mo_l_coef_new(:,:)
-  allocate(stmp(n_degen,n_degen),smat2(n_degen,n_degen))
-  allocate(mo_r_coef_tmp(ao_num,n_degen),mo_l_coef_tmp(ao_num,n_degen),mo_l_coef_new(ao_num,n_degen))
-  allocate(T(n_degen,n_degen),Snew(n_degen,n_degen))
-  do j = 1, n_degen
-   mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j))
-   mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j))
-  enddo
-  ! Orthogonalization of right functions
-  print*,'Orthogonalization of RIGHT functions'
-  print*,'------------------------------------'
-  call orthog_functions(ao_num,n_degen,mo_r_coef_tmp,ao_overlap)
 
-  ! Orthogonalization of left functions
-  print*,'Orthogonalization of LEFT functions'
-  print*,'------------------------------------'
-  call orthog_functions(ao_num,n_degen,mo_l_coef_tmp,ao_overlap)
-  print*,'Overlap lef-right '
-  call build_s_matrix(ao_num,n_degen,mo_r_coef_tmp,mo_l_coef_tmp,ao_overlap,stmp)
-  do j = 1, n_degen
-   write(*,'(100(F8.4,X))')stmp(:,j)
+    n_degen = list_degen(i,0)
+    if(n_degen .eq. 1) cycle
+
+    allocate(stmp(n_degen,n_degen), smat2(n_degen,n_degen))
+    allocate(mo_r_coef_tmp(ao_num,n_degen), mo_l_coef_tmp(ao_num,n_degen), mo_l_coef_new(ao_num,n_degen))
+    allocate(T(n_degen,n_degen), Snew(n_degen,n_degen))
+
+    do j = 1, n_degen
+      mo_r_coef_tmp(1:ao_num,j) = mo_r_coef_new(1:ao_num,list_degen(i,j))
+      mo_l_coef_tmp(1:ao_num,j) = mo_l_coef(1:ao_num,list_degen(i,j))
+    enddo
+    ! Orthogonalization of right functions
+    print *, ' Orthogonalization of RIGHT functions'
+    print *, ' ------------------------------------'
+    call orthog_functions(ao_num, n_degen, mo_r_coef_tmp, ao_overlap)
+  
+    ! Orthogonalization of left functions
+    print *, ' Orthogonalization of LEFT functions'
+    print *, ' ------------------------------------'
+    call orthog_functions(ao_num, n_degen, mo_l_coef_tmp, ao_overlap)
+
+    print *, ' Overlap lef-right '
+    call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp)
+    do j = 1, n_degen
+     write(*,'(100(F8.4,X))') stmp(:,j)
+    enddo
+    call build_s_matrix(ao_num, n_degen, mo_l_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp)
+
+    !print*,'LEFT/LEFT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'RIGHT/RIGHT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+
+    if(maxovl_tc) then
+      T    = 0.d0
+      Snew = 0.d0
+      call maxovl(n_degen, n_degen, stmp, T, Snew)
+    !print*,'overlap after'
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')Snew(:,j)
+    !enddo
+      call dgemm( 'N', 'N', ao_num, n_degen, n_degen, 1.d0                  &
+                , mo_l_coef_tmp, size(mo_l_coef_tmp, 1), T(1,1), size(T, 1) &
+                , 0.d0, mo_l_coef_new, size(mo_l_coef_new, 1) )
+     call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'Overlap test'
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    else 
+      mo_l_coef_new = mo_l_coef_tmp
+    endif
+
+    call impose_weighted_biorthog_svd(ao_num, n_degen, ao_overlap, mo_l_coef_new, mo_r_coef_tmp)
+
+    !call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'LAST OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    !call build_s_matrix(ao_num, n_degen, mo_l_coef_new, mo_l_coef_new, ao_overlap, stmp)
+    !print*,'LEFT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    !call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_r_coef_tmp, ao_overlap, stmp)
+    !print*,'RIGHT OVERLAP '
+    !do j = 1, n_degen
+    ! write(*,'(100(F16.10,X))')stmp(:,j)
+    !enddo
+    do j = 1, n_degen
+!!!   mo_l_coef_good(1:ao_num,j+ifirst-1) = mo_l_coef_new(1:ao_num,j)
+!!!   mo_r_coef_good(1:ao_num,j+ifirst-1) = mo_r_coef_tmp(1:ao_num,j)
+      mo_l_coef_good(1:ao_num,list_degen(i,j)) = mo_l_coef_new(1:ao_num,j)
+      mo_r_coef_good(1:ao_num,list_degen(i,j)) = mo_r_coef_tmp(1:ao_num,j)
+    enddo
+
+    deallocate(stmp, smat2)
+    deallocate(mo_r_coef_tmp, mo_l_coef_tmp, mo_l_coef_new)
+    deallocate(T, Snew)
   enddo
-  call build_s_matrix(ao_num,n_degen,mo_l_coef_tmp,mo_l_coef_tmp,ao_overlap,stmp)
+
+  !allocate(stmp(mo_num, mo_num))
+  !call build_s_matrix(ao_num, mo_num, mo_l_coef_good, mo_r_coef_good, ao_overlap, stmp)
+  !print*,'LEFT/RIGHT OVERLAP '
+  !do j = 1, mo_num
+  ! write(*,'(100(F16.10,X))')stmp(:,j)
+  !enddo
+  !call build_s_matrix(ao_num, mo_num, mo_l_coef_good, mo_l_coef_good, ao_overlap, stmp)
   !print*,'LEFT/LEFT OVERLAP '
-  !do j = 1, n_degen
+  !do j = 1, mo_num
   ! write(*,'(100(F16.10,X))')stmp(:,j)
   !enddo
-  call build_s_matrix(ao_num,n_degen,mo_r_coef_tmp,mo_r_coef_tmp,ao_overlap,stmp)
+  !call build_s_matrix(ao_num, mo_num, mo_r_coef_good, mo_r_coef_good, ao_overlap, stmp)
   !print*,'RIGHT/RIGHT OVERLAP '
-  !do j = 1, n_degen
+  !do j = 1, mo_num
   ! write(*,'(100(F16.10,X))')stmp(:,j)
   !enddo
-  if(maxovl_tc)then
-   T    = 0.d0
-   Snew = 0.d0
-   call maxovl(n_degen, n_degen, stmp, T, Snew)
-  !print*,'overlap after'
-  !do j = 1, n_degen
-  ! write(*,'(100(F16.10,X))')Snew(:,j)
-  !enddo
-   call dgemm( 'N', 'N', ao_num, n_degen, n_degen, 1.d0               &
-             , mo_l_coef_tmp, size(mo_l_coef_tmp, 1), T(1,1), size(T, 1) &
-             , 0.d0, mo_l_coef_new, size(mo_l_coef_new, 1) )
-   call build_s_matrix(ao_num,n_degen,mo_l_coef_new,mo_r_coef_tmp,ao_overlap,stmp)
-  !print*,'Overlap test'
-  !do j = 1, n_degen
-  ! write(*,'(100(F16.10,X))')stmp(:,j)
-  !enddo
-  else 
-   mo_l_coef_new = mo_l_coef_tmp
-  endif
-  call impose_biorthog_svd_overlap(ao_num, n_degen, ao_overlap, mo_l_coef_new, mo_r_coef_tmp)
-  call build_s_matrix(ao_num,n_degen,mo_l_coef_new,mo_r_coef_tmp,ao_overlap,stmp)
- !print*,'LAST OVERLAP '
- !do j = 1, n_degen
- ! write(*,'(100(F16.10,X))')stmp(:,j)
- !enddo
-  call build_s_matrix(ao_num,n_degen,mo_l_coef_new,mo_l_coef_new,ao_overlap,stmp)
- !print*,'LEFT OVERLAP '
- !do j = 1, n_degen
- ! write(*,'(100(F16.10,X))')stmp(:,j)
- !enddo
-  call build_s_matrix(ao_num,n_degen,mo_r_coef_tmp,mo_r_coef_tmp,ao_overlap,stmp)
- !print*,'RIGHT OVERLAP '
- !do j = 1, n_degen
- ! write(*,'(100(F16.10,X))')stmp(:,j)
- !enddo
-  do j = 1, n_degen
-!   mo_l_coef_good(1:ao_num,j+ifirst-1) = mo_l_coef_new(1:ao_num,j)
-!   mo_r_coef_good(1:ao_num,j+ifirst-1) = mo_r_coef_tmp(1:ao_num,j)
-   mo_l_coef_good(1:ao_num,list_degen(i,j)) = mo_l_coef_new(1:ao_num,j)
-   mo_r_coef_good(1:ao_num,list_degen(i,j)) = mo_r_coef_tmp(1:ao_num,j)
-  enddo
-  deallocate(stmp,smat2)
-  deallocate(mo_r_coef_tmp,mo_l_coef_tmp,mo_l_coef_new)
-  deallocate(T,Snew)
- enddo
 
- allocate(stmp(mo_num, mo_num))
- call build_s_matrix(ao_num,mo_num,mo_l_coef_good,mo_r_coef_good,ao_overlap,stmp)
- !print*,'LEFT/RIGHT OVERLAP '
- !do j = 1, mo_num
- ! write(*,'(100(F16.10,X))')stmp(:,j)
- !enddo
- call build_s_matrix(ao_num,mo_num,mo_l_coef_good,mo_l_coef_good,ao_overlap,stmp)
- !print*,'LEFT/LEFT OVERLAP '
- !do j = 1, mo_num
- ! write(*,'(100(F16.10,X))')stmp(:,j)
- !enddo
- call build_s_matrix(ao_num,mo_num,mo_r_coef_good,mo_r_coef_good,ao_overlap,stmp)
- !print*,'RIGHT/RIGHT OVERLAP '
- !do j = 1, mo_num
- ! write(*,'(100(F16.10,X))')stmp(:,j)
- !enddo
   mo_r_coef = mo_r_coef_good
   mo_l_coef = mo_l_coef_good
   call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
   call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
   TOUCH mo_l_coef mo_r_coef
-  double precision, allocatable :: new_angles(:)
+
   allocate(new_angles(mo_num))
   new_angles(1:mo_num) = dabs(angle_left_right(1:mo_num))
-  double precision :: max_angle
   max_angle = maxval(new_angles)
   good_angles = max_angle.lt.45.d0
-  print*,'max_angle = ',max_angle
+  print *, ' max_angle = ', max_angle
   
 end
 
-subroutine build_s_matrix(m,n,C1,C2,overlap,smat)
- implicit none
- integer, intent(in) :: m,n
- double precision, intent(in) :: C1(m,n),C2(m,n),overlap(m,m)
- double precision, intent(out):: smat(n,n)
- integer :: i,j,k,l
- smat = 0.D0
-  do i = 1, n
-   do j = 1, n
-    do k = 1, m
-     do l = 1, m
-      smat(i,j) += C1(k,i) * overlap(l,k) * C2(l,j) 
-     enddo
-    enddo
-   enddo
-  enddo
+! ---
+
+subroutine build_s_matrix(m, n, C1, C2, overlap, smat)
+
+  implicit none
+  integer,          intent(in)  :: m, n
+  double precision, intent(in)  :: C1(m,n), C2(m,n), overlap(m,m)
+  double precision, intent(out) :: smat(n,n)
+  integer                       :: i, j, k, l
+  double precision, allocatable :: S_tmp(:,:)
+
+  smat = 0.d0
+
+  !do i = 1, n
+  !  do j = 1, n
+  !    do k = 1, m
+  !      do l = 1, m
+  !        smat(i,j) += C1(k,i) * overlap(l,k) * C2(l,j) 
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  ! C1.T x overlap
+  allocate(S_tmp(n,m))
+  call dgemm( 'T', 'N', n, m, m, 1.d0                    &
+            , C1, size(C1, 1), overlap, size(overlap, 1) &
+            , 0.d0, S_tmp, size(S_tmp, 1) )
+  ! C1.T x overlap x C2
+  call dgemm( 'N', 'N', n, n, m, 1.d0                     &
+            , S_tmp, size(S_tmp, 1), C2(1,1), size(C2, 1) &
+            , 0.d0, smat, size(smat, 1) )
+  deallocate(S_tmp)
+
 end
 
-subroutine orthog_functions(m,n,coef,overlap)
- implicit none
- integer, intent(in) :: m,n
- double precision, intent(in)    :: overlap(m,m)
- double precision, intent(inout) :: coef(m,n)
- double precision, allocatable :: stmp(:,:)
- integer :: j
- allocate(stmp(n,n))
-  call build_s_matrix(m,n,coef,coef,overlap,stmp)
+! ---
+
+subroutine orthog_functions(m, n, coef, overlap)
+
+  implicit none
+
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: overlap(m,m)
+  double precision, intent(inout) :: coef(m,n)
+  double precision, allocatable   :: stmp(:,:)
+  integer                         :: j
+
+  allocate(stmp(n,n))
+  call build_s_matrix(m, n, coef, coef, overlap, stmp)
 ! print*,'overlap before'
 ! do j = 1, n
 !  write(*,'(100(F16.10,X))')stmp(:,j)
 ! enddo
-  call impose_orthog_svd_overlap(m, n, coef,overlap)
-  call build_s_matrix(m,n,coef,coef,overlap,stmp)
+  call impose_orthog_svd_overlap(m, n, coef, overlap)
+  call build_s_matrix(m, n, coef, coef, overlap, stmp)
   do j = 1, n
-   coef(1,:m) *= 1.d0/dsqrt(stmp(j,j))
+    coef(1,:m) *= 1.d0/dsqrt(stmp(j,j))
   enddo
-  call build_s_matrix(m,n,coef,coef,overlap,stmp)
+  call build_s_matrix(m, n, coef, coef, overlap, stmp)
+
  !print*,'overlap after'
  !do j = 1, n
  ! write(*,'(100(F16.10,X))')stmp(:,j)
  !enddo
+
+ deallocate(stmp)
+
 end
 
-subroutine print_angles_tc
- implicit none
- integer :: i,j
- double precision :: left,right
- print*,'product of norms, angle between vectors'                                                                  
- do i = 1, mo_num
-  left = overlap_mo_l(i,i)
-  right = overlap_mo_r(i,i)
-!  print*,Fock_matrix_tc_mo_tot(i,i),left*right,angle_left_right(i)
-  print*,left*right,angle_left_right(i)
- enddo
-end
+! ---
 
-subroutine print_energy_and_mos
- implicit none
- integer :: i
-  print*,''
-  print*,'TC energy = ', TC_HF_energy
-  print*,'TC SCF energy gradient = ',grad_non_hermit
-  print*,'Max angle Left/right  = ',max_angle_left_right
-  if(max_angle_left_right.lt.45.d0)then
-   print*,'Maximum angle BELOW 45 degrees, everthing is OK !'
-  else if(max_angle_left_right.gt.45.d0.and.max_angle_left_right.lt.75.d0)then
-   print*,'Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...'
-  else if(max_angle_left_right.gt.75.d0)then
-   print*,'Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...'
-  endif
-  print*,'Diag Fock elem, product of left/right norm, angle left/right '
+subroutine print_angles_tc()
+
+  implicit none
+  integer          :: i, j
+  double precision :: left, right
+
+  print *, ' product of norms, angle between vectors'                                                                  
   do i = 1, mo_num
-   write(*,'(I3,X,100(F16.10,X))')i,Fock_matrix_tc_mo_tot(i,i),overlap_mo_l(i,i)*overlap_mo_r(i,i),angle_left_right(i)
+    left  = overlap_mo_l(i,i)
+    right = overlap_mo_r(i,i)
+!  print*,Fock_matrix_tc_mo_tot(i,i),left*right,angle_left_right(i)
+    print *, left*right, angle_left_right(i)
   enddo
+
 end
 
+! ---
+
+subroutine print_energy_and_mos()
+
+  implicit none
+  integer :: i
+
+  print *, ' '
+  print *, ' TC energy = ', TC_HF_energy
+  print *, ' TC SCF energy gradient = ', grad_non_hermit
+  print *, ' Max angle Left/right   = ', max_angle_left_right
+
+  if(max_angle_left_right .lt. 45.d0) then
+    print *, ' Maximum angle BELOW 45 degrees, everthing is OK !'
+  else if(max_angle_left_right .gt. 45.d0 .and. max_angle_left_right .lt. 75.d0) then
+    print *, ' Maximum angle between 45 and 75 degrees, this is not the best for TC-CI calculations ...'
+  else if(max_angle_left_right .gt. 75.d0) then
+    print *, ' Maximum angle between ABOVE 75 degrees, YOU WILL CERTAINLY FIND TROUBLES IN TC-CI calculations ...'
+  endif
+
+  print *, ' Diag Fock elem, product of left/right norm, angle left/right '
+  do i = 1, mo_num
+    write(*, '(I3,X,100(F16.10,X))') i, Fock_matrix_tc_mo_tot(i,i), overlap_mo_l(i,i)*overlap_mo_r(i,i), angle_left_right(i)
+  enddo
+
+end
+
+! ---
+
 subroutine sort_by_tc_fock
  implicit none 
  integer, allocatable :: iorder(:)
@@ -276,3 +350,4 @@ subroutine sort_by_tc_fock
  touch mo_l_coef mo_r_coef
 
 end
+
diff --git a/src/tc_scf/tc_petermann_factor.irp.f b/src/tc_scf/tc_petermann_factor.irp.f
new file mode 100644
index 00000000..d3722098
--- /dev/null
+++ b/src/tc_scf/tc_petermann_factor.irp.f
@@ -0,0 +1,78 @@
+
+! ---
+
+program tc_petermann_factor
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer                       :: i, j
+  double precision              :: Pf_diag_av
+  double precision, allocatable :: Sl(:,:), Sr(:,:), Pf(:,:)
+
+  allocate(Sl(mo_num,mo_num), Sr(mo_num,mo_num), Pf(mo_num,mo_num))
+
+  call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0                       &
+            , mo_l_coef, size(mo_l_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Sl, size(Sl, 1) )
+
+  print *, ''
+  print *, ' left-orthog matrix:'
+  do i = 1, mo_num
+    write(*,'(100(F8.4,X))') Sl(:,i)
+  enddo
+
+  call dgemm( "T", "N", mo_num, mo_num, ao_num, 1.d0                       &
+            , mo_r_coef, size(mo_r_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, Sr, size(Sr, 1) )
+
+  print *, ''
+  print *, ' right-orthog matrix:'
+  do i = 1, mo_num
+    write(*,'(100(F8.4,X))') Sr(:,i)
+  enddo
+
+  print *, ''
+  print *, ' Petermann matrix:'
+  do i = 1, mo_num
+    do j = 1, mo_num
+      Pf(j,i) = Sl(j,i) * Sr(j,i)
+    enddo
+    write(*,'(100(F8.4,X))') Pf(:,i)
+  enddo
+
+  Pf_diag_av = 0.d0
+  do i = 1, mo_num
+    Pf_diag_av = Pf_diag_av + Pf(i,i)
+  enddo
+  Pf_diag_av = Pf_diag_av / dble(mo_num)
+
+  print *, ''
+  print *, ' mean of the diagonal Petermann factor = ', Pf_diag_av
+
+  deallocate(Sl, Sr, Pf)
+
+  return
+end subroutine
+
+! ---
+
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
index 4a875b59..48cbbdc0 100644
--- a/src/tc_scf/tc_scf.irp.f
+++ b/src/tc_scf/tc_scf.irp.f
@@ -19,8 +19,8 @@ program tc_scf
   !call orthonormalize_mos
 
   call routine_scf()
-  call minimize_tc_orb_angles
-  call print_energy_and_mos
+  call minimize_tc_orb_angles()
+  call print_energy_and_mos()
 
 
 end
diff --git a/src/tools/print_he_energy.irp.f b/src/tools/print_he_energy.irp.f
new file mode 100644
index 00000000..87488fba
--- /dev/null
+++ b/src/tools/print_he_energy.irp.f
@@ -0,0 +1,365 @@
+
+! ---
+
+program print_he_energy
+
+  implicit none
+
+  call print_overlap()
+
+  call print_energy1()
+  call print_energy2()
+
+end 
+
+! ---
+
+subroutine print_overlap()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: S_ij
+
+  print *, ' ao_overlap:'
+  do i = 1, ao_num
+    do j = 1, ao_num
+      print *, j, i, ao_overlap(j,i)
+    enddo
+  enddo
+
+
+  print *, ' mo_overlap:'
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      S_ij = 0.d0
+      do k = 1, ao_num
+        do l = 1, ao_num
+          S_ij += mo_coef(k,i) * ao_overlap(k,l) * mo_coef(l,j)
+        enddo
+      enddo
+      
+      print *, i, j, S_ij
+    enddo
+  enddo
+
+end subroutine print_overlap
+
+! ---
+
+subroutine print_energy1()
+
+  implicit none
+  integer                    :: i, j, k, l
+  double precision           :: e, n, e_tmp, n_tmp
+  double precision, external :: ao_two_e_integral
+
+  e = 0.d0
+  n = 0.d0
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! < phi_1 phi_1 | h1 | phi_1 phi_1 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,1)
+      n_tmp += mo_coef(i,1) * ao_overlap(i,j)         * mo_coef(j,1)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_1 phi_1 | h2 | phi_1 phi_1 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      n_tmp += mo_coef(i,1) * ao_overlap(i,j)         * mo_coef(j,1)
+      e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,1)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! ---
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          ! ( phi_1 phi_1 | phi_1 phi_1 )
+          e += mo_coef(i,1) * mo_coef(j,1) * ao_two_e_integral(i,j,k,l) * mo_coef(k,1) * mo_coef(l,1)
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! ---
+
+  ! < phi_1 phi_1 | phi_1 phi_1 >
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1)
+      n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1)
+    enddo
+  enddo
+
+  n += e_tmp * n_tmp
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  e = e / n 
+  print *, ' energy = ', e
+
+end subroutine print_energy1
+
+! ---
+
+subroutine print_energy2()
+
+  implicit none
+  integer                    :: i, j, k, l
+  double precision           :: e, n, e_tmp, n_tmp
+  double precision, external :: ao_two_e_integral
+
+  e = 0.d0
+  n = 0.d0
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! < phi_1 phi_2 | h1 | phi_1 phi_2 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,1)
+      n_tmp += mo_coef(i,2) * ao_overlap(i,j)         * mo_coef(j,2)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_1 phi_2 | h1 | phi_2 phi_1 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,2)
+      n_tmp += mo_coef(i,2) * ao_overlap(i,j)         * mo_coef(j,1)
+    enddo
+  enddo
+
+  e -= e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_2 phi_1 | h1 | phi_1 phi_2 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_coef(i,2) * ao_one_e_integrals(i,j) * mo_coef(j,1)
+      n_tmp += mo_coef(i,1) * ao_overlap(i,j)         * mo_coef(j,2)
+    enddo
+  enddo
+
+  e -= e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_2 phi_1 | h1 | phi_2 phi_1 >
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_coef(i,2) * ao_one_e_integrals(i,j) * mo_coef(j,2)
+      n_tmp += mo_coef(i,1) * ao_overlap(i,j)         * mo_coef(j,1)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_1 phi_2 | h2 | phi_1 phi_2 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      n_tmp += mo_coef(i,1) * ao_overlap(i,j)         * mo_coef(j,1)
+      e_tmp += mo_coef(i,2) * ao_one_e_integrals(i,j) * mo_coef(j,2)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_1 phi_2 | h2 | phi_2 phi_1 >
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      n_tmp += mo_coef(i,1) * ao_overlap(i,j)         * mo_coef(j,2)
+      e_tmp += mo_coef(i,2) * ao_one_e_integrals(i,j) * mo_coef(j,1)
+    enddo
+  enddo
+
+  e -= e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_2 phi_1 | h2 | phi_1 phi_2 >
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      n_tmp += mo_coef(i,2) * ao_overlap(i,j)         * mo_coef(j,1)
+      e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,2)
+    enddo
+  enddo
+
+  e -= e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_2 phi_1 | h2 | phi_2 phi_1 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      n_tmp += mo_coef(i,2) * ao_overlap(i,j)         * mo_coef(j,2)
+      e_tmp += mo_coef(i,1) * ao_one_e_integrals(i,j) * mo_coef(j,1)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! ---
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          ! ( phi_1 phi_1 | phi_2 phi_2 )
+          e += mo_coef(i,1) * mo_coef(j,1) * ao_two_e_integral(i,j,k,l) * mo_coef(k,2) * mo_coef(l,2)
+
+          ! ( phi_1 phi_2 | phi_2 phi_1 )
+          e -= mo_coef(i,1) * mo_coef(j,2) * ao_two_e_integral(i,j,k,l) * mo_coef(k,2) * mo_coef(l,1)
+
+          ! ( phi_2 phi_1 | phi_1 phi_2 )
+          e -= mo_coef(i,2) * mo_coef(j,1) * ao_two_e_integral(i,j,k,l) * mo_coef(k,1) * mo_coef(l,2)
+
+          ! ( phi_2 phi_2 | phi_1 phi_1 )
+          e += mo_coef(i,2) * mo_coef(j,2) * ao_two_e_integral(i,j,k,l) * mo_coef(k,1) * mo_coef(l,1)
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! ---
+
+  ! < phi_1 phi_2 | phi_1 phi_2 >
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1)
+      n_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,2)
+    enddo
+  enddo
+
+  n += e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_1 phi_2 | phi_2 phi_1 >
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,2)
+      n_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,1)
+    enddo
+  enddo
+
+  n -= e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_2 phi_1 | phi_1 phi_2 >
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,1)
+      n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,2)
+    enddo
+  enddo
+
+  n -= e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_2 phi_1 | phi_2 phi_1 >
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_coef(i,2) * ao_overlap(i,j) * mo_coef(j,2)
+      n_tmp += mo_coef(i,1) * ao_overlap(i,j) * mo_coef(j,1)
+    enddo
+  enddo
+
+  n += e_tmp * n_tmp
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  e = e / n 
+  print *, ' energy = ', e
+
+end subroutine print_energy2
+
+! ---
diff --git a/src/utils/block_diag_degen.irp.f b/src/utils/block_diag_degen.irp.f
index f6906e23..188bfa58 100644
--- a/src/utils/block_diag_degen.irp.f
+++ b/src/utils/block_diag_degen.irp.f
@@ -1,177 +1,218 @@
 
-subroutine diag_mat_per_fock_degen(fock_diag,mat_ref,n,thr_deg,leigvec,reigvec,eigval)
- implicit none
- integer, intent(in) :: n
- double precision, intent(in) :: fock_diag(n),mat_ref(n,n),thr_deg
- double precision, intent(out):: leigvec(n,n),reigvec(n,n),eigval(n)
- BEGIN_DOC
-! subroutine that diagonalizes a matrix mat_ref BY BLOCK
-!
-! the blocks are defined by the elements having the SAME DEGENERACIES in the entries "fock_diag"
-!
-! examples : all elements having degeneracy 1 in fock_diag (i.e. not being degenerated) will be treated together
-!
-!          : all elements having degeneracy 2 in fock_diag (i.e. two elements are equal) will be treated together
-!
-!          : all elements having degeneracy 3 in fock_diag (i.e. two elements are equal) will be treated together
-!
-! etc... the advantage is to guarentee no spurious mixing because of numerical problems. 
- END_DOC
- double precision, allocatable :: leigvec_unsrtd(:,:),reigvec_unsrtd(:,:),eigval_unsrtd(:)
- integer, allocatable :: list_degen(:,:),list_same_degen(:)
- integer, allocatable :: iorder(:),list_degen_sorted(:)
- integer :: n_degen_list,n_degen,size_mat,i,j,k,icount,m,index_degen
- integer :: ii,jj,i_good,j_good,n_real
- double precision, allocatable :: mat_tmp(:,:),eigval_tmp(:),leigvec_tmp(:,:),reigvec_tmp(:,:)
+subroutine diag_mat_per_fock_degen(fock_diag, mat_ref, n, thr_d, thr_nd, thr_deg, leigvec, reigvec, eigval)
 
- allocate(leigvec_unsrtd(n,n),reigvec_unsrtd(n,n),eigval_unsrtd(n))
- leigvec_unsrtd = 0.d0
- reigvec_unsrtd = 0.d0
- eigval_unsrtd  = 0.d0
 
- allocate(list_degen(n,0:n))
+  BEGIN_DOC
+  !
+  ! subroutine that diagonalizes a matrix mat_ref BY BLOCK
+  !
+  ! the blocks are defined by the elements having the SAME DEGENERACIES in the entries "fock_diag"
+  !
+  ! examples : all elements having degeneracy 1 in fock_diag (i.e. not being degenerated) will be treated together
+  !
+  !          : all elements having degeneracy 2 in fock_diag (i.e. two elements are equal) will be treated together
+  !
+  !          : all elements having degeneracy 3 in fock_diag (i.e. two elements are equal) will be treated together
+  !
+  ! etc... the advantage is to guarentee no spurious mixing because of numerical problems. 
+  !
+  END_DOC
 
- ! obtain degeneracies 
- call give_degen_full_list(fock_diag,n,thr_deg,list_degen,n_degen_list)
- allocate(iorder(n_degen_list),list_degen_sorted(n_degen_list))
- do i =1, n_degen_list
-  n_degen = list_degen(i,0) 
-  list_degen_sorted(i) = n_degen
-  iorder(i)=i
- enddo
- ! sort by number of degeneracies 
- call isort(list_degen_sorted,iorder,n_degen_list)
- integer :: icount_eigval
- logical, allocatable :: is_ok(:)
- allocate(is_ok(n_degen_list))
- is_ok = .True.
- icount_eigval = 0
- ! loop over degeneracies 
- do i = 1, n_degen_list
-  if(.not.is_ok(i))cycle
-  is_ok(i) = .False.
-  n_degen = list_degen_sorted(i)
-  print*,'diagonalizing for n_degen = ',n_degen
-  k = 1
-  ! group all the entries having the same degeneracies 
-!  do while (list_degen_sorted(i+k)==n_degen)
-  do m = i+1,n_degen_list
-   if(list_degen_sorted(m)==n_degen)then
-    is_ok(i+k)=.False.
-    k += 1
-   endif
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: fock_diag(n), mat_ref(n,n), thr_d, thr_nd, thr_deg
+  double precision, intent(out) :: leigvec(n,n), reigvec(n,n), eigval(n)
+
+  integer                       :: n_degen_list, n_degen,size_mat, i, j, k, icount, m, index_degen
+  integer                       :: ii, jj, i_good, j_good, n_real
+  integer                       :: icount_eigval
+  logical,          allocatable :: is_ok(:)
+  integer,          allocatable :: list_degen(:,:), list_same_degen(:)
+  integer,          allocatable :: iorder(:), list_degen_sorted(:)
+  double precision, allocatable :: leigvec_unsrtd(:,:), reigvec_unsrtd(:,:), eigval_unsrtd(:)
+  double precision, allocatable :: mat_tmp(:,:), eigval_tmp(:), leigvec_tmp(:,:), reigvec_tmp(:,:)
+
+  allocate(leigvec_unsrtd(n,n), reigvec_unsrtd(n,n), eigval_unsrtd(n))
+  leigvec_unsrtd = 0.d0
+  reigvec_unsrtd = 0.d0
+  eigval_unsrtd  = 0.d0
+
+  ! obtain degeneracies 
+  allocate(list_degen(n,0:n))
+  call give_degen_full_list(fock_diag, n, thr_deg, list_degen, n_degen_list)
+
+  allocate(iorder(n_degen_list), list_degen_sorted(n_degen_list))
+  do i = 1, n_degen_list
+    n_degen = list_degen(i,0) 
+    list_degen_sorted(i) = n_degen
+    iorder(i) = i
   enddo
-  print*,'number of identical degeneracies = ',k
-  size_mat = k*n_degen  
-  print*,'size_mat = ',size_mat
-  allocate(mat_tmp(size_mat,size_mat),list_same_degen(size_mat))
-  allocate(eigval_tmp(size_mat),leigvec_tmp(size_mat,size_mat),reigvec_tmp(size_mat,size_mat))
-  ! group all the elements sharing the same degeneracy
-  icount = 0
-  do j = 1, k ! jth set of degeneracy
-   index_degen = iorder(i+j-1)
-   do m = 1, n_degen
-    icount += 1
-    list_same_degen(icount) = list_degen(index_degen,m)
-   enddo
+
+  ! sort by number of degeneracies 
+  call isort(list_degen_sorted, iorder, n_degen_list)
+
+  allocate(is_ok(n_degen_list))
+  is_ok = .True.
+  icount_eigval = 0
+
+  ! loop over degeneracies 
+  do i = 1, n_degen_list
+    if(.not.is_ok(i)) cycle
+
+    is_ok(i) = .False.
+    n_degen  = list_degen_sorted(i)
+
+    print *, ' diagonalizing for n_degen = ', n_degen
+
+    k = 1
+
+   ! group all the entries having the same degeneracies 
+!!  do while (list_degen_sorted(i+k)==n_degen)
+    do m = i+1, n_degen_list
+      if(list_degen_sorted(m)==n_degen) then
+        is_ok(i+k) = .False.
+        k += 1
+      endif
+    enddo
+
+    print *, ' number of identical degeneracies = ', k
+    size_mat = k*n_degen  
+    print *, ' size_mat = ', size_mat
+    allocate(mat_tmp(size_mat,size_mat), list_same_degen(size_mat))
+    allocate(eigval_tmp(size_mat), leigvec_tmp(size_mat,size_mat), reigvec_tmp(size_mat,size_mat))
+    ! group all the elements sharing the same degeneracy
+    icount = 0
+    do j = 1, k ! jth set of degeneracy
+      index_degen = iorder(i+j-1)
+      do m = 1, n_degen
+        icount += 1
+        list_same_degen(icount) = list_degen(index_degen,m)
+      enddo
+    enddo
+
+    print *, ' list of elements '
+    do icount = 1, size_mat
+      print *, icount, list_same_degen(icount)
+    enddo
+
+    ! you copy subset of matrix elements having all the same degeneracy in mat_tmp
+    do ii = 1, size_mat
+      i_good = list_same_degen(ii)
+      do jj = 1, size_mat
+        j_good = list_same_degen(jj)
+        mat_tmp(jj,ii) = mat_ref(j_good,i_good)
+      enddo
+    enddo
+
+    call non_hrmt_bieig( size_mat, mat_tmp, thr_d, thr_nd &
+                       , leigvec_tmp, reigvec_tmp         & 
+                       , n_real, eigval_tmp )
+
+    do ii = 1, size_mat
+      icount_eigval += 1
+      eigval_unsrtd(icount_eigval) = eigval_tmp(ii) ! copy eigenvalues 
+      do jj = 1, size_mat ! copy the eigenvectors 
+        j_good = list_same_degen(jj)
+        leigvec_unsrtd(j_good,icount_eigval) = leigvec_tmp(jj,ii)
+        reigvec_unsrtd(j_good,icount_eigval) = reigvec_tmp(jj,ii)
+      enddo
+    enddo
+
+    deallocate(mat_tmp, list_same_degen)
+    deallocate(eigval_tmp, leigvec_tmp, reigvec_tmp)
   enddo
-  print*,'list of elements '
-  do icount = 1, size_mat
-   print*,icount,list_same_degen(icount)
-  enddo
-  ! you copy subset of matrix elements having all the same degeneracy in mat_tmp
-  do ii = 1, size_mat
-   i_good = list_same_degen(ii)
-   do jj = 1, size_mat
-    j_good = list_same_degen(jj)
-    mat_tmp(jj,ii) = mat_ref(j_good,i_good)
-   enddo
-  enddo
-  call non_hrmt_bieig( size_mat, mat_tmp&
-                     , leigvec_tmp, reigvec_tmp& 
-                     , n_real, eigval_tmp)
-  do ii = 1, size_mat
-   icount_eigval += 1
-   eigval_unsrtd(icount_eigval) = eigval_tmp(ii) ! copy eigenvalues 
-   do jj = 1, size_mat ! copy the eigenvectors 
-    j_good = list_same_degen(jj)
-    leigvec_unsrtd(j_good,icount_eigval) = leigvec_tmp(jj,ii)
-    reigvec_unsrtd(j_good,icount_eigval) = reigvec_tmp(jj,ii)
-   enddo
-  enddo
-  deallocate(mat_tmp,list_same_degen)
-  deallocate(eigval_tmp,leigvec_tmp,reigvec_tmp)
- enddo
- if(icount_eigval.ne.n)then
-  print*,'pb !! (icount_eigval.ne.n)'
-  print*,'icount_eigval,n',icount_eigval,n
-  stop
- endif
+
+  if(icount_eigval .ne. n) then
+    print *, ' pb !! (icount_eigval.ne.n)'
+    print *, ' icount_eigval,n', icount_eigval, n
+    stop
+  endif
  
- deallocate(iorder)
- allocate(iorder(n))
- do i = 1, n
-  iorder(i) = i
- enddo
- call dsort(eigval_unsrtd,iorder,n)
- do i = 1, n
-  print*,'sorted eigenvalues '
-  i_good = iorder(i)
-  eigval(i) = eigval_unsrtd(i)
-  print*,'i,eigval(i) = ',i,eigval(i)
-  do j = 1, n
-   leigvec(j,i) = leigvec_unsrtd(j,i_good)
-   reigvec(j,i) = reigvec_unsrtd(j,i_good)
+  deallocate(iorder)
+  allocate(iorder(n))
+  do i = 1, n
+    iorder(i) = i
   enddo
- enddo
+  call dsort(eigval_unsrtd, iorder, n)
+
+  do i = 1, n
+    print*,'sorted eigenvalues '
+    i_good = iorder(i)
+    eigval(i) = eigval_unsrtd(i)
+    print*,'i,eigval(i) = ',i,eigval(i)
+    do j = 1, n
+      leigvec(j,i) = leigvec_unsrtd(j,i_good)
+      reigvec(j,i) = reigvec_unsrtd(j,i_good)
+    enddo
+  enddo
+
+  deallocate(leigvec_unsrtd, reigvec_unsrtd, eigval_unsrtd)
+  deallocate(list_degen)
+  deallocate(iorder, list_degen_sorted)
+  deallocate(is_ok)
+
 end
 
-subroutine give_degen_full_list(A,n,thr,list_degen,n_degen_list)
- implicit none
- BEGIN_DOC
- ! you enter with an array A(n) and spits out all the elements degenerated up to thr
- !
- ! the elements of A(n) DON'T HAVE TO BE SORTED IN THE ENTRANCE: TOTALLY GENERAL 
- !
- ! list_degen(i,0) = number of degenerate entries 
- !
- ! list_degen(i,1) = index of the first degenerate entry
- !
- ! list_degen(i,2:list_degen(i,0)) = list of all other dengenerate entries 
- !
- ! if list_degen(i,0) == 1 it means that there is no degeneracy for that element
- END_DOC
- double precision,intent(in) :: A(n)
- double precision,intent(in)    :: thr
- integer, intent(in) :: n
- integer, intent(out) :: list_degen(n,0:n),n_degen_list
- logical, allocatable :: is_ok(:)
- allocate(is_ok(n))
- integer :: i,j,icount,icheck
- n_degen_list = 0
- is_ok = .True.
- do i = 1, n
-  if(.not.is_ok(i))cycle
-  n_degen_list +=1
-  is_ok(i) = .False.
-  list_degen(n_degen_list,1) = i
-  icount = 1
-  do j = i+1, n
-   if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j))then
-    is_ok(j) = .False.
-    icount += 1
-    list_degen(n_degen_list,icount) = j
-   endif
+! ---
+
+subroutine give_degen_full_list(A, n, thr, list_degen, n_degen_list)
+
+  BEGIN_DOC
+  ! you enter with an array A(n) and spits out all the elements degenerated up to thr
+  !
+  ! the elements of A(n) DON'T HAVE TO BE SORTED IN THE ENTRANCE: TOTALLY GENERAL 
+  !
+  ! list_degen(i,0) = number of degenerate entries 
+  !
+  ! list_degen(i,1) = index of the first degenerate entry
+  !
+  ! list_degen(i,2:list_degen(i,0)) = list of all other dengenerate entries 
+  !
+  ! if list_degen(i,0) == 1 it means that there is no degeneracy for that element
+  END_DOC
+
+  implicit none
+
+  double precision, intent(in)  :: A(n)
+  double precision, intent(in)  :: thr
+  integer,          intent(in)  :: n
+  integer,          intent(out) :: list_degen(n,0:n), n_degen_list
+  integer                       :: i, j, icount, icheck
+  logical, allocatable          :: is_ok(:)
+
+
+  allocate(is_ok(n))
+  n_degen_list = 0
+  is_ok = .True.
+  do i = 1, n
+    if(.not.is_ok(i)) cycle
+    n_degen_list +=1
+    is_ok(i) = .False.
+    list_degen(n_degen_list,1) = i
+    icount = 1
+    do j = i+1, n
+      if(dabs(A(i)-A(j)).lt.thr.and.is_ok(j)) then
+        is_ok(j) = .False.
+        icount += 1
+        list_degen(n_degen_list,icount) = j
+      endif
+    enddo
+
+    list_degen(n_degen_list,0) = icount
   enddo
-  list_degen(n_degen_list,0) = icount
- enddo
- icheck = 0
- do i = 1, n_degen_list
-  icheck += list_degen(i,0)
- enddo
- if(icheck.ne.n)then
-  print*,'pb ! :: icheck.ne.n'
-  print*,icheck,n
-  stop
- endif
+
+  icheck = 0
+  do i = 1, n_degen_list
+    icheck += list_degen(i,0)
+  enddo
+
+  if(icheck.ne.n)then
+    print *, ' pb ! :: icheck.ne.n'
+    print *, icheck, n
+    stop
+  endif
+
 end
+
+! ---
+
diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f
index f68465c7..f593cefb 100644
--- a/src/utils/integration.irp.f
+++ b/src/utils/integration.irp.f
@@ -47,7 +47,9 @@ subroutine give_explicit_poly_and_gaussian_x(P_new,P_center,p,fact_k,iorder,alph
 end
 
 
+! TODO remove dim
 subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,dim)
+
   BEGIN_DOC
   ! Transforms the product of
   !          (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta)
@@ -60,6 +62,7 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
   ! returns a "s" function centered in zero
   ! with an inifinite exponent and a zero polynom coef
   END_DOC
+
   implicit none
   include 'constants.include.F'
   integer, intent(in)            :: dim
@@ -129,7 +132,8 @@ end
 
 !---
 
-subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,n_points)
+subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, LD_A, B_center, n_points)
+
   BEGIN_DOC
   ! Transforms the product of
   !          (x-x_A)^a(1) (x-x_B)^b(1) (x-x_A)^a(2) (y-y_B)^b(2) (z-z_A)^a(3) (z-z_B)^b(3) exp(-(r-A)^2 alpha) exp(-(r-B)^2 beta)
@@ -142,24 +146,26 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde
   ! returns a "s" function centered in zero
   ! with an inifinite exponent and a zero polynom coef
   END_DOC
-  implicit none
+
   include 'constants.include.F'
-  integer, intent(in)            :: n_points, ldp
-  integer, intent(in)            :: a(3),b(3)         ! powers : (x-xa)**a_x = (x-A(1))**a(1)
-  double precision, intent(in)   :: alpha, beta       ! exponents
-  double precision, intent(in)   :: A_center(n_points,3) ! A center
-  double precision, intent(in)   :: B_center (3)         ! B center
-  double precision, intent(out)  :: P_center(n_points,3) ! new center
-  double precision, intent(out)  :: p                 ! new exponent
-  double precision, intent(out)  :: fact_k(n_points)            ! constant factor
-  double precision, intent(out)  :: P_new(n_points,0:ldp,3)! polynomial
-  integer, intent(out)           :: iorder(3)         ! i_order(i) = order of the polynomials
 
-  double precision, allocatable  :: P_a(:,:,:), P_b(:,:,:)
+  implicit none
+  integer,          intent(in)  :: n_points, ldp, LD_A
+  integer,          intent(in)  :: a(3), b(3)              ! powers : (x-xa)**a_x = (x-A(1))**a(1)
+  double precision, intent(in)  :: alpha, beta             ! exponents
+  double precision, intent(in)  :: A_center(LD_A,3)        ! A center
+  double precision, intent(in)  :: B_center(3)             ! B center
+  integer,          intent(out) :: iorder(3)               ! i_order(i) = order of the polynomials
+  double precision, intent(out) :: P_center(n_points,3)    ! new center
+  double precision, intent(out) :: p                       ! new exponent
+  double precision, intent(out) :: fact_k(n_points)        ! constant factor
+  double precision, intent(out) :: P_new(n_points,0:ldp,3) ! polynomial
 
-  integer                        :: n_new,i,j, ipoint, lda, ldb, xyz
+  integer                       :: n_new, i, j, ipoint, lda, ldb, xyz
+  double precision, allocatable :: P_a(:,:,:), P_b(:,:,:)
 
-  call gaussian_product_v(alpha,A_center,beta,B_center,fact_k,p,P_center,n_points)
+
+  call gaussian_product_v(alpha, A_center, LD_A, beta, B_center, fact_k, p, P_center, n_points)
 
   if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then  ! b == (0,0,0)
 
@@ -167,13 +173,13 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde
     ldb = 0
     allocate(P_a(n_points,0:lda,3), P_b(n_points,0:0,3))
 
-    call recentered_poly2_v0(P_a,lda,A_center,P_center,a,P_b,B_center,P_center,n_points)
+    call recentered_poly2_v0(P_a, lda, A_center, LD_A, P_center, a, P_b, B_center, P_center, n_points)
 
     iorder(1:3) = a(1:3)
-    do ipoint=1,n_points
-      do xyz=1,3
+    do ipoint = 1, n_points
+      do xyz = 1, 3
         P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
-        do i=1,a(xyz)
+        do i = 1, a(xyz)
           P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
         enddo
       enddo
@@ -187,31 +193,31 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_k,iorde
   ldb = maxval(b)
   allocate(P_a(n_points,0:lda,3), P_b(n_points,0:ldb,3))
 
-  call recentered_poly2_v(P_a,lda,A_center,P_center,a,P_b,ldb,B_center,P_center,b,n_points)
+  call recentered_poly2_v(P_a, lda, A_center, LD_A, P_center, a, P_b, ldb, B_center, P_center, b, n_points)
 
   iorder(1:3) = a(1:3) + b(1:3)
 
-  do xyz=1,3
+  do xyz = 1, 3
     if (b(xyz) == 0) then
-      do ipoint=1,n_points
+      do ipoint = 1, n_points
         P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
-        do i=1,a(xyz)
+        do i = 1, a(xyz)
           P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
         enddo
       enddo
     else
-      do i=0,iorder(xyz)
-        do ipoint=1,n_points
+      do i = 0, iorder(xyz)
+        do ipoint = 1, n_points
           P_new(ipoint,i,xyz) = 0.d0
         enddo
       enddo
-      call multiply_poly_v(P_a(1,0,xyz), a(xyz),P_b(1,0,xyz),b(xyz),P_new(1,0,xyz),ldp,n_points)
+      call multiply_poly_v(P_a(1,0,xyz), a(xyz), P_b(1,0,xyz), b(xyz), P_new(1,0,xyz), ldp, n_points)
     endif
   enddo
 
-end
+end subroutine give_explicit_poly_and_gaussian_v
 
-!-
+! ---
 
 subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder,alpha,beta,gama,a,b,A_center,B_center,Nucl_center,dim)
   BEGIN_DOC
@@ -273,15 +279,16 @@ subroutine give_explicit_poly_and_gaussian_double(P_new,P_center,p,fact_k,iorder
 
 end
 
+! ---
 
+subroutine gaussian_product(a, xa, b, xb, k, p, xp)
 
-subroutine gaussian_product(a,xa,b,xb,k,p,xp)
-  implicit none
   BEGIN_DOC
   ! Gaussian product in 1D.
   ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
   END_DOC
 
+  implicit none
   double precision, intent(in)   :: a,b         ! Exponents
   double precision, intent(in)   :: xa(3),xb(3) ! Centers
   double precision, intent(out)  :: p           ! New exponent
@@ -312,33 +319,39 @@ subroutine gaussian_product(a,xa,b,xb,k,p,xp)
   xp(1) = (a*xa(1)+b*xb(1))*p_inv
   xp(2) = (a*xa(2)+b*xb(2))*p_inv
   xp(3) = (a*xa(3)+b*xb(3))*p_inv
+
 end subroutine
 
 !---
-subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points)
-  implicit none
+
+subroutine gaussian_product_v(a, xa, LD_xa, b, xb, k, p, xp, n_points)
+
   BEGIN_DOC
+  !
   ! Gaussian product in 1D.
   ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
+  !
   ! Using multiple A centers
+  !
   END_DOC
 
-  integer, intent(in)            :: n_points
-  double precision, intent(in)   :: a,b         ! Exponents
-  double precision, intent(in)   :: xa(n_points,3),xb(3) ! Centers
-  double precision, intent(out)  :: p ! New exponent
-  double precision, intent(out)  :: xp(n_points,3)       ! New center
-  double precision, intent(out)  :: k(n_points) ! Constant
+  implicit none
 
-  double precision               :: p_inv
+  integer,          intent(in)  :: LD_xa, n_points
+  double precision, intent(in)  :: a, b                  ! Exponents
+  double precision, intent(in)  :: xa(LD_xa,3), xb(3)    ! Centers
+  double precision, intent(out) :: p                     ! New exponent
+  double precision, intent(out) :: xp(n_points,3)        ! New center
+  double precision, intent(out) :: k(n_points)           ! Constant
+
+  integer                       :: ipoint
+  double precision              :: p_inv
+  double precision              :: xab(3), ab, ap, bp, bpxb(3)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
 
-  integer :: ipoint
   ASSERT (a>0.)
   ASSERT (b>0.)
 
-  double precision               :: xab(3), ab, ap, bp, bpxb(3)
-  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: xab
-
   p = a+b
   p_inv = 1.d0/(a+b)
   ab = a*b*p_inv
@@ -348,7 +361,7 @@ subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points)
   bpxb(2) = bp*xb(2)
   bpxb(3) = bp*xb(3)
 
-  do ipoint=1,n_points
+  do ipoint = 1, n_points
     xab(1) = xa(ipoint,1)-xb(1)
     xab(2) = xa(ipoint,2)-xb(2)
     xab(3) = xa(ipoint,3)-xb(3)
@@ -365,18 +378,19 @@ subroutine gaussian_product_v(a,xa,b,xb,k,p,xp,n_points)
       xp(ipoint,3) = ap*xa(ipoint,3)+bpxb(3)
     endif
   enddo
-end subroutine
 
+end subroutine gaussian_product_v
 
+! ---
 
+subroutine gaussian_product_x(a, xa, b, xb, k, p, xp)
 
-subroutine gaussian_product_x(a,xa,b,xb,k,p,xp)
-  implicit none
   BEGIN_DOC
   ! Gaussian product in 1D.
   ! e^{-a (x-x_A)^2} e^{-b (x-x_B)^2} = K_{ab}^x e^{-p (x-x_P)^2}
   END_DOC
 
+  implicit none
   double precision  , intent(in) :: a,b      ! Exponents
   double precision  , intent(in) :: xa,xb    ! Centers
   double precision  , intent(out) :: p       ! New exponent
@@ -625,16 +639,20 @@ subroutine recentered_poly2(P_new,x_A,x_P,a,P_new2,x_B,x_Q,b)
   do i =  21,b
     P_new2(i) =  binom_func(b,b-i) * pows_b(b-i)
   enddo
+
 end
 
-!-
-subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points)
-  implicit none
+! ---
+
+subroutine recentered_poly2_v(P_new, lda, x_A, LD_xA, x_P, a, P_new2, ldb, x_B, x_Q, b, n_points)
+
   BEGIN_DOC
   ! Recenter two polynomials
   END_DOC
-  integer, intent(in)            :: a(3),b(3), n_points, lda, ldb
-  double precision, intent(in)   :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3)
+
+  implicit none
+  integer, intent(in)            :: a(3), b(3), n_points, lda, ldb, LD_xA
+  double precision, intent(in)   :: x_A(LD_xA,3), x_P(n_points,3), x_B(3), x_Q(n_points,3)
   double precision, intent(out)  :: P_new(n_points,0:lda,3),P_new2(n_points,0:ldb,3)
   double precision               :: binom_func
   integer                        :: i,j,k,l, minab(3), maxab(3),ipoint, xyz
@@ -646,7 +664,6 @@ subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points)
 
   allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) )
 
-
   do xyz=1,3
     if ((a(xyz)<0).or.(b(xyz)<0) ) cycle
     do ipoint=1,n_points
@@ -698,27 +715,34 @@ subroutine recentered_poly2_v(P_new,lda,x_A,x_P,a,P_new2,ldb,x_B,x_Q,b,n_points)
       enddo
     enddo
   enddo
-end
 
+end subroutine recentered_poly2_v
+
+! ---
+
+subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, P_new2, x_B, x_Q, n_points)
 
-subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points)
-  implicit none
   BEGIN_DOC
   ! Recenter two polynomials. Special case for b=(0,0,0)
   END_DOC
-  integer, intent(in)            :: a(3), n_points, lda
-  double precision, intent(in)   :: x_A(n_points,3),x_P(n_points,3),x_B(3),x_Q(n_points,3)
-  double precision, intent(out)  :: P_new(n_points,0:lda,3),P_new2(n_points,3)
-  double precision               :: binom_func
-  integer                        :: i,j,k,l, xyz, ipoint, maxab(3)
-  double precision, allocatable  :: pows_a(:,:), pows_b(:,:)
-  double precision               :: fa
+
+  implicit none
+  integer,          intent(in)  :: a(3), n_points, lda, LD_xA
+  double precision, intent(in)  :: x_A(LD_xA,3) 
+  double precision, intent(in)  :: x_B(3)
+  double precision, intent(in)  :: x_P(n_points,3), x_Q(n_points,3)
+  double precision, intent(out) :: P_new(n_points,0:lda,3), P_new2(n_points,3)
+  integer                       :: i, j, k, l, xyz, ipoint, maxab(3)
+  double precision              :: fa
+  double precision, allocatable :: pows_a(:,:), pows_b(:,:)
+
+  double precision              :: binom_func
 
   maxab(1:3) = max(a(1:3),(/0,0,0/))
 
   allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) )
 
-  do xyz=1,3
+  do xyz = 1, 3
     if (a(xyz)<0) cycle
     do ipoint=1,n_points
       pows_a(ipoint,0) = 1.d0
@@ -736,25 +760,25 @@ subroutine recentered_poly2_v0(P_new,lda,x_A,x_P,a,P_new2,x_B,x_Q,n_points)
       P_new (ipoint,0,xyz) =  pows_a(ipoint,a(xyz))
       P_new2(ipoint,xyz) =  pows_b(ipoint,0)
     enddo
-    do i =  1,min(a(xyz),20)
-      fa = binom_transp(a(xyz)-i,a(xyz))
-      do ipoint=1,n_points
-        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+    do i = 1, min(a(xyz), 20)
+      fa = binom_transp(a(xyz)-i, a(xyz))
+      do ipoint = 1, n_points
+        P_new(ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i)
       enddo
     enddo
-    do i = 21,a(xyz)
-      fa = binom_func(a(xyz),a(xyz)-i)
-      do ipoint=1,n_points
-        P_new (ipoint,i,xyz) =  fa * pows_a(ipoint,a(xyz)-i)
+    do i = 21, a(xyz)
+      fa = binom_func(a(xyz), a(xyz)-i)
+      do ipoint = 1, n_points
+        P_new(ipoint,i,xyz) = fa * pows_a(ipoint,a(xyz)-i)
       enddo
     enddo
 
   enddo !xyz
 
   deallocate(pows_a, pows_b)
-end
 
-!--
+end subroutine recentered_poly2_v0
+
 !--
 
 subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol)
diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f
index a62c657e..c797c87e 100644
--- a/src/utils/one_e_integration.irp.f
+++ b/src/utils/one_e_integration.irp.f
@@ -32,9 +32,8 @@ double precision function overlap_gaussian_x(A_center,B_center,alpha,beta,power_
 end
 
 
-subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,&
-      power_B,overlap_x,overlap_y,overlap_z,overlap,dim)
-  implicit none
+subroutine overlap_gaussian_xyz(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, overlap_y, overlap_z, overlap, dim)
+
   BEGIN_DOC
   !.. math::
   !
@@ -42,7 +41,10 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,&
   !   S = S_x S_y S_z
   !
   END_DOC
+
   include 'constants.include.F'
+
+  implicit none
   integer,intent(in)             :: dim ! dimension maximum for the arrays representing the polynomials
   double precision,intent(in)    :: A_center(3),B_center(3)  ! center of the x1 functions
   double precision, intent(in)   :: alpha,beta
@@ -51,17 +53,18 @@ subroutine overlap_gaussian_xyz(A_center,B_center,alpha,beta,power_A,&
   double precision               :: P_new(0:max_dim,3),P_center(3),fact_p,p
   double precision               :: F_integral_tab(0:max_dim)
   integer                        :: iorder_p(3)
-
-  call give_explicit_poly_and_gaussian(P_new,P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,dim)
-   if(fact_p.lt.1d-20)then
-     overlap_x = 1.d-10
-     overlap_y = 1.d-10
-     overlap_z = 1.d-10
-     overlap = 1.d-10
-     return
-   endif
   integer                        :: nmax
   double precision               :: F_integral
+
+  call give_explicit_poly_and_gaussian(P_new, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, B_center, dim)
+  if(fact_p.lt.1d-20)then
+    overlap_x = 1.d-10
+    overlap_y = 1.d-10
+    overlap_z = 1.d-10
+    overlap = 1.d-10
+    return
+  endif
+
   nmax = maxval(iorder_p)
   do i = 0,nmax
     F_integral_tab(i) = F_integral(i,p)
@@ -150,72 +153,74 @@ subroutine overlap_x_abs(A_center, B_center, alpha, beta, power_A, power_B, over
   overlap_x = factor * dx * overlap_x
 end
 
-
 ! ---
 
-subroutine overlap_gaussian_xyz_v(A_center,B_center,alpha,beta,power_A,&
-      power_B,overlap,dim, n_points)
-  implicit none
+subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, power_B, overlap, n_points)
+
   BEGIN_DOC
   !.. math::
   !
-  !   S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2)  (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\
+  !   S_x = \int (x-A_x)^{a_x} exp(-\alpha(x-A_x)^2) (x-B_x)^{b_x} exp(-beta(x-B_x)^2) dx \\
   !   S = S_x S_y S_z
   !
   END_DOC
+
   include 'constants.include.F'
-  integer,intent(in)             :: dim, n_points
-  double precision,intent(in)    :: A_center(n_points,3),B_center(3)  ! center of the x1 functions
-  double precision, intent(in)   :: alpha,beta
-  integer,intent(in)             :: power_A(3), power_B(3) ! power of the x1 functions
-  double precision, intent(out)  :: overlap(n_points)
-  double precision               :: F_integral_tab(0:max_dim)
-  double precision               :: p, overlap_x, overlap_y, overlap_z
-  double precision, allocatable  :: P_new(:,:,:),P_center(:,:),fact_p(:), fact_pp(:), pp(:)
-  integer                        :: iorder_p(3), ipoint, ldp
-  integer                        :: nmax
-  double precision               :: F_integral
+
+  implicit none
+
+  integer,          intent(in)  :: n_points
+  integer,          intent(in)  :: power_A(3), power_B(3)             ! power of the x1 functions
+  double precision, intent(in)  :: A_center(n_points,3), B_center(3)  ! center of the x1 functions
+  double precision, intent(in)  :: alpha, beta
+  double precision, intent(out) :: overlap(n_points)
+
+  integer                       :: i
+  integer                       :: iorder_p(3), ipoint, ldp
+  integer                       :: nmax
+  double precision              :: F_integral_tab(0:max_dim)
+  double precision              :: p, overlap_x, overlap_y, overlap_z
+  double precision              :: F_integral
+  double precision, allocatable :: P_new(:,:,:), P_center(:,:), fact_p(:)
 
   ldp = maxval( power_A(1:3) + power_B(1:3) )
-  allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points), &
-           fact_pp(n_points), pp(n_points))
 
-  call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center,p,fact_p,iorder_p,alpha,beta,power_A,power_B,A_center,B_center,n_points)
+  allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points))
+
+  call give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, n_points, B_center, n_points)
 
   nmax = maxval(iorder_p)
-  do i=0, nmax
+  do i = 0, nmax
     F_integral_tab(i) = F_integral(i,p)
   enddo
 
-  integer                        :: i
+  do ipoint = 1, n_points
 
-  call gaussian_product_v(alpha,A_center,beta,B_center,fact_pp,pp,P_center,n_points)
-
-  do ipoint=1,n_points
-    if(fact_p(ipoint).lt.1d-20)then
+    if(fact_p(ipoint) .lt. 1d-20) then
       overlap(ipoint) = 1.d-10
       cycle
     endif
 
     overlap_x = P_new(ipoint,0,1) * F_integral_tab(0)
-    do i = 1,iorder_p(1)
+    do i = 1, iorder_p(1)
       overlap_x = overlap_x + P_new(ipoint,i,1) * F_integral_tab(i)
     enddo
 
     overlap_y = P_new(ipoint,0,2) * F_integral_tab(0)
-    do i = 1,iorder_p(2)
+    do i = 1, iorder_p(2)
       overlap_y = overlap_y + P_new(ipoint,i,2) * F_integral_tab(i)
     enddo
 
     overlap_z = P_new(ipoint,0,3) * F_integral_tab(0)
-    do i = 1,iorder_p(3)
+    do i = 1, iorder_p(3)
       overlap_z = overlap_z + P_new(ipoint,i,3) * F_integral_tab(i)
     enddo
 
-    overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_pp(ipoint)
+    overlap(ipoint) = overlap_x * overlap_y * overlap_z * fact_p(ipoint)
   enddo
 
-  deallocate(P_new, P_center, fact_p, pp, fact_pp)
-end
+  deallocate(P_new, P_center, fact_p)
+
+end subroutine overlap_gaussian_xyz_v
 
 ! ---
diff --git a/src/utils/qsort.c b/src/utils/qsort.c
deleted file mode 100644
index c011b35a..00000000
--- a/src/utils/qsort.c
+++ /dev/null
@@ -1,373 +0,0 @@
-/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */
-#include <stdlib.h>
-#include <stdint.h>
-
-struct int16_t_comp {
-  int16_t    x;
-  int32_t i;
-};
-
-int compare_int16_t( const void * l, const void * r )
-{
-  const int16_t * restrict _l= l;
-  const int16_t * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
-  struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp), compare_int16_t);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_int16_t_noidx(int16_t* A, int32_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t);
-}
-
-
-struct int16_t_comp_big {
-  int16_t    x;
-  int64_t i;
-};
-
-int compare_int16_t_big( const void * l, const void * r )
-{
-  const int16_t * restrict _l= l;
-  const int16_t * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
-  struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct int16_t_comp_big), compare_int16_t_big);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_int16_t_noidx_big(int16_t* A, int64_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(int16_t), compare_int16_t_big);
-}
-
-
-struct int32_t_comp {
-  int32_t    x;
-  int32_t i;
-};
-
-int compare_int32_t( const void * l, const void * r )
-{
-  const int32_t * restrict _l= l;
-  const int32_t * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
-  struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp), compare_int32_t);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_int32_t_noidx(int32_t* A, int32_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t);
-}
-
-
-struct int32_t_comp_big {
-  int32_t    x;
-  int64_t i;
-};
-
-int compare_int32_t_big( const void * l, const void * r )
-{
-  const int32_t * restrict _l= l;
-  const int32_t * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
-  struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct int32_t_comp_big), compare_int32_t_big);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_int32_t_noidx_big(int32_t* A, int64_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(int32_t), compare_int32_t_big);
-}
-
-
-struct int64_t_comp {
-  int64_t    x;
-  int32_t i;
-};
-
-int compare_int64_t( const void * l, const void * r )
-{
-  const int64_t * restrict _l= l;
-  const int64_t * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) {
-  struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp), compare_int64_t);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_int64_t_noidx(int64_t* A, int32_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t);
-}
-
-
-struct int64_t_comp_big {
-  int64_t    x;
-  int64_t i;
-};
-
-int compare_int64_t_big( const void * l, const void * r )
-{
-  const int64_t * restrict _l= l;
-  const int64_t * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) {
-  struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct int64_t_comp_big), compare_int64_t_big);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_int64_t_noidx_big(int64_t* A, int64_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(int64_t), compare_int64_t_big);
-}
-
-
-struct double_comp {
-  double    x;
-  int32_t i;
-};
-
-int compare_double( const void * l, const void * r )
-{
-  const double * restrict _l= l;
-  const double * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) {
-  struct double_comp* A = malloc(isize * sizeof(struct double_comp));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct double_comp), compare_double);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_double_noidx(double* A, int32_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(double), compare_double);
-}
-
-
-struct double_comp_big {
-  double    x;
-  int64_t i;
-};
-
-int compare_double_big( const void * l, const void * r )
-{
-  const double * restrict _l= l;
-  const double * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) {
-  struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct double_comp_big), compare_double_big);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_double_noidx_big(double* A, int64_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(double), compare_double_big);
-}
-
-
-struct float_comp {
-  float    x;
-  int32_t i;
-};
-
-int compare_float( const void * l, const void * r )
-{
-  const float * restrict _l= l;
-  const float * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) {
-  struct float_comp* A = malloc(isize * sizeof(struct float_comp));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct float_comp), compare_float);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_float_noidx(float* A, int32_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(float), compare_float);
-}
-
-
-struct float_comp_big {
-  float    x;
-  int64_t i;
-};
-
-int compare_float_big( const void * l, const void * r )
-{
-  const float * restrict _l= l;
-  const float * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) {
-  struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct float_comp_big), compare_float_big);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_float_noidx_big(float* A, int64_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(float), compare_float_big);
-}
-/* Generated C file:1 ends here */
diff --git a/src/utils/qsort.org b/src/utils/qsort.org
deleted file mode 100644
index abf0d54a..00000000
--- a/src/utils/qsort.org
+++ /dev/null
@@ -1,169 +0,0 @@
-#+TITLE: Quick sort binding for Fortran
-
-* C template
-
-#+NAME: c_template
-#+BEGIN_SRC c
-struct TYPE_comp_big {
-  TYPE    x;
-  int32_t i;
-};
-
-int compare_TYPE_big( const void * l, const void * r )
-{
-  const TYPE * restrict _l= l;
-  const TYPE * restrict _r= r;
-  if( *_l > *_r ) return 1;
-  if( *_l < *_r ) return -1;
-  return 0;
-}
-
-void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) {
-  struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big));
-  if (A == NULL) return;
-
-  for (int i=0 ; i<isize ; ++i) {
-    A[i].x = A_in[i];
-    A[i].i = iorder[i];
-  }
-
-  qsort( (void*) A, (size_t) isize, sizeof(struct TYPE_comp_big), compare_TYPE_big);
-
-  for (int i=0 ; i<isize ; ++i) {
-    A_in[i] = A[i].x;
-    iorder[i] = A[i].i;
-  }
-  free(A);
-}
-
-void qsort_TYPE_noidx_big(TYPE* A, int32_t isize) {
-  qsort( (void*) A, (size_t) isize, sizeof(TYPE), compare_TYPE_big);
-}
-#+END_SRC
-
-* Fortran template
-
-#+NAME:f_template
-#+BEGIN_SRC f90
-subroutine Lsort_big_c(A, iorder, isize) bind(C, name="qsort_TYPE_big")
-  use iso_c_binding
-  integer(c_int32_t), value :: isize
-  integer(c_int32_t)        :: iorder(isize)
-  real   (c_TYPE)         :: A(isize)
-end subroutine Lsort_big_c
-
-subroutine Lsort_noidx_big_c(A, isize) bind(C, name="qsort_TYPE_noidx_big")
-  use iso_c_binding
-  integer(c_int32_t), value :: isize
-  real   (c_TYPE)         :: A(isize)
-end subroutine Lsort_noidx_big_c
-
-#+END_SRC
-
-#+NAME:f_template2
-#+BEGIN_SRC f90
-subroutine Lsort_big(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int32_t)        :: isize
-  integer(c_int32_t)        :: iorder(isize)
-  real   (c_TYPE)         :: A(isize)
-  call Lsort_big_c(A, iorder, isize)
-end subroutine Lsort_big
-
-subroutine Lsort_noidx_big(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int32_t) :: isize
-  real   (c_TYPE)    :: A(isize)
-  call Lsort_noidx_big_c(A, isize)
-end subroutine Lsort_noidx_big
-
-#+END_SRC
-
-* Python scripts for type replacements
-
-#+NAME: replaced
-#+begin_src python :results output :noweb yes
-data = """
-<<c_template>>
-"""
-for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
-    print( data.replace("TYPE", typ).replace("_big", "") )
-    print( data.replace("int32_t", "int64_t").replace("TYPE", typ) )
-#+end_src
-
-#+NAME: replaced_f
-#+begin_src python :results output :noweb yes
-data = """
-<<f_template>>
-"""
-c1 = {
-    "int16_t": "i2",
-    "int32_t": "i",
-    "int64_t": "i8",
-    "double": "d",
-    "float": ""
-}
-c2 = {
-    "int16_t": "integer",
-    "int32_t": "integer",
-    "int64_t": "integer",
-    "double": "real",
-    "float": "real"
-}
-
-for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
-    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
-    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
-#+end_src
-
-#+NAME: replaced_f2
-#+begin_src python :results output :noweb yes
-data = """
-<<f_template2>>
-"""
-c1 = {
-    "int16_t": "i2",
-    "int32_t": "i",
-    "int64_t": "i8",
-    "double": "d",
-    "float": ""
-}
-c2 = {
-    "int16_t": "integer",
-    "int32_t": "integer",
-    "int64_t": "integer",
-    "double": "real",
-    "float": "real"
-}
-
-for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]:
-    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") )
-    print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) )
-#+end_src
-
-* Generated C file
-  
-#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes
-#include <stdlib.h>
-#include <stdint.h>
-<<replaced()>>
-#+END_SRC
-
-* Generated Fortran file
-
-#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes
-module qsort_module
-  use iso_c_binding
-  
-  interface
-     <<replaced_f()>>
-  end interface
-
-end module qsort_module
-
-<<replaced_f2()>>
-
-#+END_SRC
-
diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90
deleted file mode 100644
index a72a4f9e..00000000
--- a/src/utils/qsort_module.f90
+++ /dev/null
@@ -1,347 +0,0 @@
-module qsort_module
-  use iso_c_binding
-  
-  interface
-     
-     subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t")
-       use iso_c_binding
-       integer(c_int32_t), value :: isize
-       integer(c_int32_t)        :: iorder(isize)
-       integer   (c_int16_t)         :: A(isize)
-     end subroutine i2sort_c
-     
-     subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx")
-       use iso_c_binding
-       integer(c_int32_t), value :: isize
-       integer   (c_int16_t)         :: A(isize)
-     end subroutine i2sort_noidx_c
-     
-     
-     
-     subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big")
-       use iso_c_binding
-       integer(c_int64_t), value :: isize
-       integer(c_int64_t)        :: iorder(isize)
-       integer   (c_int16_t)         :: A(isize)
-     end subroutine i2sort_big_c
-     
-     subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big")
-       use iso_c_binding
-       integer(c_int64_t), value :: isize
-       integer   (c_int16_t)         :: A(isize)
-     end subroutine i2sort_noidx_big_c
-     
-     
-     
-     subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t")
-       use iso_c_binding
-       integer(c_int32_t), value :: isize
-       integer(c_int32_t)        :: iorder(isize)
-       integer   (c_int32_t)         :: A(isize)
-     end subroutine isort_c
-     
-     subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx")
-       use iso_c_binding
-       integer(c_int32_t), value :: isize
-       integer   (c_int32_t)         :: A(isize)
-     end subroutine isort_noidx_c
-     
-     
-     
-     subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big")
-       use iso_c_binding
-       integer(c_int64_t), value :: isize
-       integer(c_int64_t)        :: iorder(isize)
-       integer   (c_int32_t)         :: A(isize)
-     end subroutine isort_big_c
-     
-     subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big")
-       use iso_c_binding
-       integer(c_int64_t), value :: isize
-       integer   (c_int32_t)         :: A(isize)
-     end subroutine isort_noidx_big_c
-     
-     
-     
-     subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t")
-       use iso_c_binding
-       integer(c_int32_t), value :: isize
-       integer(c_int32_t)        :: iorder(isize)
-       integer   (c_int64_t)         :: A(isize)
-     end subroutine i8sort_c
-     
-     subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx")
-       use iso_c_binding
-       integer(c_int32_t), value :: isize
-       integer   (c_int64_t)         :: A(isize)
-     end subroutine i8sort_noidx_c
-     
-     
-     
-     subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big")
-       use iso_c_binding
-       integer(c_int64_t), value :: isize
-       integer(c_int64_t)        :: iorder(isize)
-       integer   (c_int64_t)         :: A(isize)
-     end subroutine i8sort_big_c
-     
-     subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big")
-       use iso_c_binding
-       integer(c_int64_t), value :: isize
-       integer   (c_int64_t)         :: A(isize)
-     end subroutine i8sort_noidx_big_c
-     
-     
-     
-     subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double")
-       use iso_c_binding
-       integer(c_int32_t), value :: isize
-       integer(c_int32_t)        :: iorder(isize)
-       real   (c_double)         :: A(isize)
-     end subroutine dsort_c
-     
-     subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx")
-       use iso_c_binding
-       integer(c_int32_t), value :: isize
-       real   (c_double)         :: A(isize)
-     end subroutine dsort_noidx_c
-     
-     
-     
-     subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big")
-       use iso_c_binding
-       integer(c_int64_t), value :: isize
-       integer(c_int64_t)        :: iorder(isize)
-       real   (c_double)         :: A(isize)
-     end subroutine dsort_big_c
-     
-     subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big")
-       use iso_c_binding
-       integer(c_int64_t), value :: isize
-       real   (c_double)         :: A(isize)
-     end subroutine dsort_noidx_big_c
-     
-     
-     
-     subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float")
-       use iso_c_binding
-       integer(c_int32_t), value :: isize
-       integer(c_int32_t)        :: iorder(isize)
-       real   (c_float)         :: A(isize)
-     end subroutine sort_c
-     
-     subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx")
-       use iso_c_binding
-       integer(c_int32_t), value :: isize
-       real   (c_float)         :: A(isize)
-     end subroutine sort_noidx_c
-     
-     
-     
-     subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big")
-       use iso_c_binding
-       integer(c_int64_t), value :: isize
-       integer(c_int64_t)        :: iorder(isize)
-       real   (c_float)         :: A(isize)
-     end subroutine sort_big_c
-     
-     subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big")
-       use iso_c_binding
-       integer(c_int64_t), value :: isize
-       real   (c_float)         :: A(isize)
-     end subroutine sort_noidx_big_c
-     
-     
-     
-  end interface
-
-end module qsort_module
-
-
-subroutine i2sort(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int32_t)        :: isize
-  integer(c_int32_t)        :: iorder(isize)
-  integer   (c_int16_t)         :: A(isize)
-  call i2sort_c(A, iorder, isize)
-end subroutine i2sort
-
-subroutine i2sort_noidx(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int32_t) :: isize
-  integer   (c_int16_t)    :: A(isize)
-  call i2sort_noidx_c(A, isize)
-end subroutine i2sort_noidx
-
-
-
-subroutine i2sort_big(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int64_t)        :: isize
-  integer(c_int64_t)        :: iorder(isize)
-  integer   (c_int16_t)         :: A(isize)
-  call i2sort_big_c(A, iorder, isize)
-end subroutine i2sort_big
-
-subroutine i2sort_noidx_big(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int64_t) :: isize
-  integer   (c_int16_t)    :: A(isize)
-  call i2sort_noidx_big_c(A, isize)
-end subroutine i2sort_noidx_big
-
-
-
-subroutine isort(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int32_t)        :: isize
-  integer(c_int32_t)        :: iorder(isize)
-  integer   (c_int32_t)         :: A(isize)
-  call isort_c(A, iorder, isize)
-end subroutine isort
-
-subroutine isort_noidx(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int32_t) :: isize
-  integer   (c_int32_t)    :: A(isize)
-  call isort_noidx_c(A, isize)
-end subroutine isort_noidx
-
-
-
-subroutine isort_big(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int64_t)        :: isize
-  integer(c_int64_t)        :: iorder(isize)
-  integer   (c_int32_t)         :: A(isize)
-  call isort_big_c(A, iorder, isize)
-end subroutine isort_big
-
-subroutine isort_noidx_big(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int64_t) :: isize
-  integer   (c_int32_t)    :: A(isize)
-  call isort_noidx_big_c(A, isize)
-end subroutine isort_noidx_big
-
-
-
-subroutine i8sort(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int32_t)        :: isize
-  integer(c_int32_t)        :: iorder(isize)
-  integer   (c_int64_t)         :: A(isize)
-  call i8sort_c(A, iorder, isize)
-end subroutine i8sort
-
-subroutine i8sort_noidx(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int32_t) :: isize
-  integer   (c_int64_t)    :: A(isize)
-  call i8sort_noidx_c(A, isize)
-end subroutine i8sort_noidx
-
-
-
-subroutine i8sort_big(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int64_t)        :: isize
-  integer(c_int64_t)        :: iorder(isize)
-  integer   (c_int64_t)         :: A(isize)
-  call i8sort_big_c(A, iorder, isize)
-end subroutine i8sort_big
-
-subroutine i8sort_noidx_big(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int64_t) :: isize
-  integer   (c_int64_t)    :: A(isize)
-  call i8sort_noidx_big_c(A, isize)
-end subroutine i8sort_noidx_big
-
-
-
-subroutine dsort(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int32_t)        :: isize
-  integer(c_int32_t)        :: iorder(isize)
-  real   (c_double)         :: A(isize)
-  call dsort_c(A, iorder, isize)
-end subroutine dsort
-
-subroutine dsort_noidx(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int32_t) :: isize
-  real   (c_double)    :: A(isize)
-  call dsort_noidx_c(A, isize)
-end subroutine dsort_noidx
-
-
-
-subroutine dsort_big(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int64_t)        :: isize
-  integer(c_int64_t)        :: iorder(isize)
-  real   (c_double)         :: A(isize)
-  call dsort_big_c(A, iorder, isize)
-end subroutine dsort_big
-
-subroutine dsort_noidx_big(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int64_t) :: isize
-  real   (c_double)    :: A(isize)
-  call dsort_noidx_big_c(A, isize)
-end subroutine dsort_noidx_big
-
-
-
-subroutine sort(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int32_t)        :: isize
-  integer(c_int32_t)        :: iorder(isize)
-  real   (c_float)         :: A(isize)
-  call sort_c(A, iorder, isize)
-end subroutine sort
-
-subroutine sort_noidx(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int32_t) :: isize
-  real   (c_float)    :: A(isize)
-  call sort_noidx_c(A, isize)
-end subroutine sort_noidx
-
-
-
-subroutine sort_big(A, iorder, isize) 
-  use qsort_module
-  use iso_c_binding
-  integer(c_int64_t)        :: isize
-  integer(c_int64_t)        :: iorder(isize)
-  real   (c_float)         :: A(isize)
-  call sort_big_c(A, iorder, isize)
-end subroutine sort_big
-
-subroutine sort_noidx_big(A, isize)
-  use iso_c_binding
-  use qsort_module
-  integer(c_int64_t) :: isize
-  real   (c_float)    :: A(isize)
-  call sort_noidx_big_c(A, isize)
-end subroutine sort_noidx_big
diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f
index 089c3871..ff40263c 100644
--- a/src/utils/sort.irp.f
+++ b/src/utils/sort.irp.f
@@ -1,4 +1,222 @@
 BEGIN_TEMPLATE
+ subroutine insertion_$Xsort (x,iorder,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize) using the insertion sort algorithm.
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+  $type                          :: xtmp
+  integer                        :: i, i0, j, jmax
+
+  do i=2,isize
+    xtmp = x(i)
+    i0 = iorder(i)
+    j=i-1
+    do while (j>0)
+      if ((x(j) <= xtmp)) exit
+      x(j+1) = x(j)
+      iorder(j+1) = iorder(j)
+      j=j-1
+    enddo
+    x(j+1) = xtmp
+    iorder(j+1) = i0
+  enddo
+ end subroutine insertion_$Xsort
+
+ subroutine quick_$Xsort(x, iorder, isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize) using the quicksort algorithm.
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+  integer, external              :: omp_get_num_threads
+  call rec_$X_quicksort(x,iorder,isize,1,isize,nproc)
+ end
+
+ recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level)
+  implicit none
+  integer, intent(in)            :: isize, first, last, level
+  integer,intent(inout)          :: iorder(isize)
+  $type, intent(inout)           :: x(isize)
+  $type                          :: c, tmp
+  integer                        :: itmp
+  integer                        :: i, j
+
+  if(isize<2)return
+
+  c = x( shiftr(first+last,1) )
+  i = first
+  j = last
+  do
+    do while (x(i) < c)
+      i=i+1
+    end do
+    do while (c < x(j))
+      j=j-1
+    end do
+    if (i >= j) exit
+    tmp  = x(i)
+    x(i) = x(j)
+    x(j) = tmp
+    itmp      = iorder(i)
+    iorder(i) = iorder(j)
+    iorder(j) = itmp
+    i=i+1
+    j=j-1
+  enddo
+  if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then
+    if (first < i-1) then
+      call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
+    endif
+    if (j+1 < last) then
+      call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
+    endif
+  else
+    if (first < i-1) then
+      call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2)
+    endif
+    if (j+1 < last) then
+      call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2)
+    endif
+  endif
+ end
+
+ subroutine heap_$Xsort(x,iorder,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize) using the heap sort algorithm.
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+
+  integer                        :: i, k, j, l, i0
+  $type                          :: xtemp
+
+  l = isize/2+1
+  k = isize
+  do while (.True.)
+    if (l>1) then
+      l=l-1
+      xtemp = x(l)
+      i0 = iorder(l)
+    else
+      xtemp = x(k)
+      i0 = iorder(k)
+      x(k) = x(1)
+      iorder(k) = iorder(1)
+      k = k-1
+      if (k == 1) then
+        x(1) = xtemp
+        iorder(1) = i0
+        exit
+      endif
+    endif
+    i=l
+    j = shiftl(l,1)
+    do while (j<k)
+      if ( x(j) < x(j+1) ) then
+        j=j+1
+      endif
+      if (xtemp < x(j)) then
+        x(i) = x(j)
+        iorder(i) = iorder(j)
+        i = j
+        j = shiftl(j,1)
+      else
+        j = k+1
+      endif
+    enddo
+    if (j==k) then
+      if (xtemp < x(j)) then
+        x(i) = x(j)
+        iorder(i) = iorder(j)
+        i = j
+        j = shiftl(j,1)
+      else
+        j = k+1
+      endif
+    endif
+    x(i) = xtemp
+    iorder(i) = i0
+  enddo
+ end subroutine heap_$Xsort
+
+ subroutine heap_$Xsort_big(x,iorder,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize) using the heap sort algorithm.
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  ! This is a version for very large arrays where the indices need
+  ! to be in integer*8 format
+  END_DOC
+  integer*8,intent(in)           :: isize
+  $type,intent(inout)            :: x(isize)
+  integer*8,intent(inout)        :: iorder(isize)
+
+  integer*8                      :: i, k, j, l, i0
+  $type                          :: xtemp
+
+  l = isize/2+1
+  k = isize
+  do while (.True.)
+    if (l>1) then
+      l=l-1
+      xtemp = x(l)
+      i0 = iorder(l)
+    else
+      xtemp = x(k)
+      i0 = iorder(k)
+      x(k) = x(1)
+      iorder(k) = iorder(1)
+      k = k-1
+      if (k == 1) then
+        x(1) = xtemp
+        iorder(1) = i0
+        exit
+      endif
+    endif
+    i=l
+    j = shiftl(l,1)
+    do while (j<k)
+      if ( x(j) < x(j+1) ) then
+        j=j+1
+      endif
+      if (xtemp < x(j)) then
+        x(i) = x(j)
+        iorder(i) = iorder(j)
+        i = j
+        j = shiftl(j,1)
+      else
+        j = k+1
+      endif
+    enddo
+    if (j==k) then
+      if (xtemp < x(j)) then
+        x(i) = x(j)
+        iorder(i) = iorder(j)
+        i = j
+        j = shiftl(j,1)
+      else
+        j = k+1
+      endif
+    endif
+    x(i) = xtemp
+    iorder(i) = i0
+  enddo
+
+ end subroutine heap_$Xsort_big
 
  subroutine sorted_$Xnumber(x,isize,n)
   implicit none
@@ -32,6 +250,222 @@ SUBST [ X, type ]
 END_TEMPLATE
 
 
+!---------------------- INTEL
+IRP_IF INTEL
+
+BEGIN_TEMPLATE
+ subroutine $Xsort(x,iorder,isize)
+  use intel
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+  integer                        :: n
+  character, allocatable         :: tmp(:)
+  if (isize < 2) return
+  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
+  allocate(tmp(n))
+  call ippsSortRadixIndexAscend_$ityp(x, $n, iorder, isize, tmp)
+  deallocate(tmp)
+  iorder(1:isize) = iorder(1:isize)+1
+  call $Xset_order(x,iorder,isize)
+ end
+
+ subroutine $Xsort_noidx(x,isize)
+  use intel
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer                        :: n
+  character, allocatable         :: tmp(:)
+  if (isize < 2) return
+  call ippsSortRadixIndexGetBufferSize(isize, $ippsz, n)
+  allocate(tmp(n))
+  call ippsSortRadixAscend_$ityp_I(x, isize, tmp)
+  deallocate(tmp)
+ end
+
+SUBST [ X, type, ityp, n, ippsz ]
+   ; real ; 32f ; 4 ; 13 ;;
+ i ; integer ; 32s ; 4 ; 11 ;;
+ i2 ; integer*2 ; 16s ; 2 ; 7 ;;
+END_TEMPLATE
+
+BEGIN_TEMPLATE
+
+ subroutine $Xsort(x,iorder,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+  integer                        :: n
+  if (isize < 2) then
+    return
+  endif
+!  call sorted_$Xnumber(x,isize,n)
+!  if (isize == n) then
+!    return
+!  endif
+  if ( isize < 32) then
+    call insertion_$Xsort(x,iorder,isize)
+  else
+!    call heap_$Xsort(x,iorder,isize)
+    call quick_$Xsort(x,iorder,isize)
+  endif
+ end subroutine $Xsort
+
+SUBST [ X, type ]
+ d ; double precision ;;
+END_TEMPLATE
+
+BEGIN_TEMPLATE
+
+ subroutine $Xsort(x,iorder,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+  integer                        :: n
+  if (isize < 2) then
+    return
+  endif
+  call sorted_$Xnumber(x,isize,n)
+  if (isize == n) then
+    return
+  endif
+  if ( isize < 32) then
+    call insertion_$Xsort(x,iorder,isize)
+  else
+!    call $Xradix_sort(x,iorder,isize,-1)
+    call quick_$Xsort(x,iorder,isize)
+  endif
+ end subroutine $Xsort
+
+SUBST [ X, type ]
+ i8 ; integer*8 ;;
+END_TEMPLATE
+
+!---------------------- END INTEL
+IRP_ELSE
+!---------------------- NON-INTEL
+BEGIN_TEMPLATE
+
+ subroutine $Xsort_noidx(x,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer, allocatable           :: iorder(:)
+  integer                        :: i
+  allocate(iorder(isize))
+  do i=1,isize
+   iorder(i)=i
+  enddo
+  call $Xsort(x,iorder,isize)
+  deallocate(iorder)
+ end subroutine $Xsort_noidx
+
+SUBST [ X, type ]
+   ; real ;;
+ d ; double precision ;;
+ i ; integer ;;
+ i8 ; integer*8 ;;
+ i2 ; integer*2 ;;
+END_TEMPLATE
+
+BEGIN_TEMPLATE
+
+ subroutine $Xsort(x,iorder,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+  integer                        :: n
+  if (isize < 2) then
+    return
+  endif
+!  call sorted_$Xnumber(x,isize,n)
+!  if (isize == n) then
+!    return
+!  endif
+  if ( isize < 32) then
+    call insertion_$Xsort(x,iorder,isize)
+  else
+!    call heap_$Xsort(x,iorder,isize)
+    call quick_$Xsort(x,iorder,isize)
+  endif
+ end subroutine $Xsort
+
+SUBST [ X, type ]
+   ; real ;;
+ d ; double precision ;;
+END_TEMPLATE
+
+BEGIN_TEMPLATE
+
+ subroutine $Xsort(x,iorder,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize).
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  END_DOC
+  integer,intent(in)             :: isize
+  $type,intent(inout)            :: x(isize)
+  integer,intent(inout)          :: iorder(isize)
+  integer                        :: n
+  if (isize < 2) then
+    return
+  endif
+  call sorted_$Xnumber(x,isize,n)
+  if (isize == n) then
+    return
+  endif
+  if ( isize < 32) then
+    call insertion_$Xsort(x,iorder,isize)
+  else
+!    call $Xradix_sort(x,iorder,isize,-1)
+    call quick_$Xsort(x,iorder,isize)
+  endif
+ end subroutine $Xsort
+
+SUBST [ X, type ]
+ i ; integer ;;
+ i8 ; integer*8 ;;
+ i2 ; integer*2 ;;
+END_TEMPLATE
+
+IRP_ENDIF
+!---------------------- END NON-INTEL
+
+
 
 BEGIN_TEMPLATE
  subroutine $Xset_order(x,iorder,isize)
@@ -57,6 +491,47 @@ BEGIN_TEMPLATE
   deallocate(xtmp)
  end
 
+SUBST [ X, type ]
+   ; real ;;
+ d ; double precision ;;
+ i ; integer ;;
+ i8; integer*8 ;;
+ i2; integer*2 ;;
+END_TEMPLATE
+
+
+BEGIN_TEMPLATE
+ subroutine insertion_$Xsort_big (x,iorder,isize)
+  implicit none
+  BEGIN_DOC
+  ! Sort array x(isize) using the insertion sort algorithm.
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  ! This is a version for very large arrays where the indices need
+  ! to be in integer*8 format
+  END_DOC
+  integer*8,intent(in)           :: isize
+  $type,intent(inout)            :: x(isize)
+  integer*8,intent(inout)        :: iorder(isize)
+  $type                          :: xtmp
+  integer*8                      :: i, i0, j, jmax
+
+  do i=2_8,isize
+    xtmp = x(i)
+    i0 = iorder(i)
+    j = i-1_8
+    do while (j>0_8)
+      if (x(j)<=xtmp) exit
+      x(j+1_8) = x(j)
+      iorder(j+1_8) = iorder(j)
+      j = j-1_8
+    enddo
+    x(j+1_8) = xtmp
+    iorder(j+1_8) = i0
+  enddo
+
+ end subroutine insertion_$Xsort_big
+
  subroutine $Xset_order_big(x,iorder,isize)
   implicit none
   BEGIN_DOC
@@ -90,3 +565,223 @@ SUBST [ X, type ]
 END_TEMPLATE
 
 
+BEGIN_TEMPLATE
+
+recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix)
+  implicit none
+
+  BEGIN_DOC
+  ! Sort integer array x(isize) using the radix sort algorithm.
+  ! iorder in input should be (1,2,3,...,isize), and in output
+  ! contains the new order of the elements.
+  ! iradix should be -1 in input.
+  END_DOC
+  integer*$int_type, intent(in)  :: isize
+  integer*$int_type, intent(inout) :: iorder(isize)
+  integer*$type, intent(inout)   :: x(isize)
+  integer, intent(in)            :: iradix
+  integer                        :: iradix_new
+  integer*$type, allocatable     :: x2(:), x1(:)
+  integer*$type                  :: i4               ! data type
+  integer*$int_type, allocatable :: iorder1(:),iorder2(:)
+  integer*$int_type              :: i0, i1, i2, i3, i ! index type
+  integer*$type                  :: mask
+  integer                        :: err
+  !DIR$ ATTRIBUTES ALIGN : 128   :: iorder1,iorder2, x2, x1
+
+  if (isize < 2) then
+    return
+  endif
+
+  if (iradix == -1) then ! Sort Positive and negative
+
+    allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
+    if (err /= 0) then
+      print *,  irp_here, ': Unable to allocate arrays'
+      stop
+    endif
+
+    i1=1_$int_type
+    i2=1_$int_type
+    do i=1_$int_type,isize
+      if (x(i) < 0_$type) then
+        iorder1(i1) = iorder(i)
+        x1(i1) = -x(i)
+        i1 = i1+1_$int_type
+      else
+        iorder2(i2) = iorder(i)
+        x2(i2) = x(i)
+        i2 = i2+1_$int_type
+      endif
+    enddo
+    i1=i1-1_$int_type
+    i2=i2-1_$int_type
+
+    do i=1_$int_type,i2
+      iorder(i1+i) = iorder2(i)
+      x(i1+i) = x2(i)
+    enddo
+    deallocate(x2,iorder2,stat=err)
+    if (err /= 0) then
+      print *,  irp_here, ': Unable to deallocate arrays x2, iorder2'
+      stop
+    endif
+
+
+    if (i1 > 1_$int_type) then
+      call $Xradix_sort$big(x1,iorder1,i1,-2)
+      do i=1_$int_type,i1
+        x(i) = -x1(1_$int_type+i1-i)
+        iorder(i) = iorder1(1_$int_type+i1-i)
+      enddo
+    endif
+
+    if (i2>1_$int_type) then
+      call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2)
+    endif
+
+    deallocate(x1,iorder1,stat=err)
+    if (err /= 0) then
+      print *,  irp_here, ': Unable to deallocate arrays x1, iorder1'
+      stop
+    endif
+    return
+
+  else if (iradix == -2) then ! Positive
+
+    ! Find most significant bit
+
+    i0 = 0_$int_type
+    i4 = maxval(x)
+
+    iradix_new = max($integer_size-1-leadz(i4),1)
+    mask = ibset(0_$type,iradix_new)
+
+    allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err)
+    if (err /= 0) then
+      print *,  irp_here, ': Unable to allocate arrays'
+      stop
+    endif
+
+    i1=1_$int_type
+    i2=1_$int_type
+
+    do i=1_$int_type,isize
+      if (iand(mask,x(i)) == 0_$type) then
+        iorder1(i1) = iorder(i)
+        x1(i1) = x(i)
+        i1 = i1+1_$int_type
+      else
+        iorder2(i2) = iorder(i)
+        x2(i2) = x(i)
+        i2 = i2+1_$int_type
+      endif
+    enddo
+    i1=i1-1_$int_type
+    i2=i2-1_$int_type
+
+    do i=1_$int_type,i1
+      iorder(i0+i) = iorder1(i)
+      x(i0+i) = x1(i)
+    enddo
+    i0 = i0+i1
+    i3 = i0
+    deallocate(x1,iorder1,stat=err)
+    if (err /= 0) then
+      print *,  irp_here, ': Unable to deallocate arrays x1, iorder1'
+      stop
+    endif
+
+
+    do i=1_$int_type,i2
+      iorder(i0+i) = iorder2(i)
+      x(i0+i) = x2(i)
+    enddo
+    i0 = i0+i2
+    deallocate(x2,iorder2,stat=err)
+    if (err /= 0) then
+      print *,  irp_here, ': Unable to deallocate arrays x2, iorder2'
+      stop
+    endif
+
+
+    if (i3>1_$int_type) then
+      call $Xradix_sort$big(x,iorder,i3,iradix_new-1)
+    endif
+
+    if (isize-i3>1_$int_type) then
+      call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1)
+    endif
+
+    return
+  endif
+
+  ASSERT (iradix >= 0)
+
+  if (isize < 48) then
+    call insertion_$Xsort$big(x,iorder,isize)
+    return
+  endif
+
+
+  allocate(x2(isize),iorder2(isize),stat=err)
+  if (err /= 0) then
+    print *,  irp_here, ': Unable to allocate arrays x1, iorder1'
+    stop
+  endif
+
+
+  mask = ibset(0_$type,iradix)
+  i0=1_$int_type
+  i1=1_$int_type
+
+  do i=1_$int_type,isize
+    if (iand(mask,x(i)) == 0_$type) then
+      iorder(i0) = iorder(i)
+      x(i0) = x(i)
+      i0 = i0+1_$int_type
+    else
+      iorder2(i1) = iorder(i)
+      x2(i1) = x(i)
+      i1 = i1+1_$int_type
+    endif
+  enddo
+  i0=i0-1_$int_type
+  i1=i1-1_$int_type
+
+  do i=1_$int_type,i1
+    iorder(i0+i) = iorder2(i)
+    x(i0+i) = x2(i)
+  enddo
+
+  deallocate(x2,iorder2,stat=err)
+  if (err /= 0) then
+    print *,  irp_here, ': Unable to allocate arrays x2, iorder2'
+    stop
+  endif
+
+
+  if (iradix == 0) then
+    return
+  endif
+
+
+  if (i1>1_$int_type) then
+    call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1)
+  endif
+  if (i0>1) then
+    call $Xradix_sort$big(x,iorder,i0,iradix-1)
+  endif
+
+ end
+
+SUBST [ X, type, integer_size, is_big, big, int_type ]
+ i  ; 4 ; 32 ; .False. ;      ; 4 ;;
+ i8 ; 8 ; 64 ; .False. ;      ; 4 ;;
+ i2 ; 2 ; 16 ; .False. ;      ; 4 ;;
+ i  ; 4 ; 32 ; .True.  ; _big ; 8 ;;
+ i8 ; 8 ; 64 ; .True.  ; _big ; 8 ;;
+END_TEMPLATE
+
+
+