From 354ba6cb284efcdc9b52db0c0cc45ef6403e4307 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 2 Dec 2022 18:35:03 +0100
Subject: [PATCH 01/68] working on test_int in tc_scf

---
 src/ao_many_one_e_ints/grad2_jmu_modif.irp.f |   4 +-
 src/ao_many_one_e_ints/listj1b.irp.f         |   2 +-
 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f     | 111 +++++++++++++++++++
 src/non_h_ints_mu/j12_nucl_utils.irp.f       |  17 +++
 src/tc_scf/test_int.irp.f                    |  77 +++++++++++++
 5 files changed, 208 insertions(+), 3 deletions(-)
 create mode 100644 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
 create mode 100644 src/tc_scf/test_int.irp.f

diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index cdc86456..c9c3b259 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -344,9 +344,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
             centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-            !if(expo_coef_1s .gt. 80.d0) cycle
+            if(expo_coef_1s .gt. 80.d0) cycle
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            !if(dabs(coef_tmp) .lt. 1d-10) cycle
+            if(dabs(coef_tmp) .lt. 1d-10) cycle
 
             int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
 
diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f
index 1178cc31..32308f59 100644
--- a/src/ao_many_one_e_ints/listj1b.irp.f
+++ b/src/ao_many_one_e_ints/listj1b.irp.f
@@ -168,7 +168,7 @@ END_PROVIDER
 
     do j = 1, nucl_num
       tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
-      print*,List_all_comb_b3(j,i),j1b_pen(j)
+!      print*,List_all_comb_b3(j,i),j1b_pen(j)
       List_all_comb_b3_expo(i)   += tmp_alphaj
       List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1)
       List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2)
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
new file mode 100644
index 00000000..22e2fe3d
--- /dev/null
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -0,0 +1,111 @@
+BEGIN_PROVIDER [ double precision, ao_overlap_abs_grid, (ao_num, ao_num)]
+ implicit none
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_overlap_abs_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    ao_overlap_abs_grid(j,i) += contrib 
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_center(1:3,j,i) = \int dr |phi_i(r) phi_j(r)| x/y/z / \int |phi_i(r) phi_j(r)|
+!
+! if \int |phi_i(r) phi_j(r)| < 1.d-15 then ao_prod_center = 0.
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_prod_center = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    do m = 1, 3
+     ao_prod_center(m,j,i) += contrib * r(m)
+    enddo
+   enddo
+  enddo
+ enddo
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+    do m = 1, 3
+     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+    enddo
+   endif
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_sigma(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
+!
+! gives you a precise idea of the spatial extension of the distribution phi_i(r) phi_j(r)
+ END_DOC
+ ao_prod_sigma = 0.d0
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3),contrib_x2
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    contrib_x2 = 0.d0
+    do m = 1, 3
+     contrib_x2 += (r(m) - ao_prod_center(m,j,i)) * (r(m) - ao_prod_center(m,j,i)) 
+    enddo
+    contrib_x2 = dsqrt(contrib_x2)
+    ao_prod_sigma(j,i) += contrib * contrib_x2
+   enddo
+  enddo
+ enddo
+
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+     ao_prod_sigma(j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+   endif
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! ao_prod_dist_grid(j,i,ipoint) = distance between the center of |phi_i(r) phi_j(r)| and the grid point r(ipoint)
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: distance,r(3)
+ do ipoint = 1, n_points_final_grid
+  r(:) = final_grid_points(:,ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    distance = 0.d0
+    do m = 1, 3
+     distance += (ao_prod_center(m,j,i) - r(m))*(ao_prod_center(m,j,i) - r(m))
+    enddo
+    distance = dsqrt(distance)
+    ao_prod_dist_grid(j,i,ipoint)  = distance
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f
index f3b68f43..a515e0b8 100644
--- a/src/non_h_ints_mu/j12_nucl_utils.irp.f
+++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f
@@ -237,6 +237,23 @@ end function j12_mu
 
 ! ---
 
+double precision function j12_mu_r12(r12)
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: r12
+  double precision             :: mu_r12
+
+  mu_r12 = mu_erf * r12
+
+  j12_mu_r12 = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf
+
+  return
+end function j12_mu_r12
+
+! ---
+
 double precision function j12_mu_gauss(r1, r2)
 
   implicit none
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
new file mode 100644
index 00000000..29fcb2a9
--- /dev/null
+++ b/src/tc_scf/test_int.irp.f
@@ -0,0 +1,77 @@
+program test_ints
+
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, 'starting ...'
+
+  my_grid_becke  = .True.
+!  my_n_pt_r_grid = 30
+!  my_n_pt_a_grid = 50
+  my_n_pt_r_grid = 10 ! small grid for quick debug
+  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+ call routine 
+
+end
+
+subroutine routine
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u_grad1u_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u_grad1u_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end

From 5bd7b7ca6b9c5b1e8ab7e6f044f87546663572e0 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Mon, 5 Dec 2022 12:49:38 +0100
Subject: [PATCH 02/68] added grad2_jmu_manu.irp.f grad_lapl_jmu_manu.irp.f
 listj1b_sorted.irp.f

---
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   | 222 ++++++++++++++
 .../grad_lapl_jmu_manu.irp.f                  | 287 ++++++++++++++++++
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   | 188 ++++++++++++
 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f      |   5 +
 src/tc_scf/test_int.irp.f                     | 222 +++++++++++++-
 5 files changed, 921 insertions(+), 3 deletions(-)
 create mode 100644 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
 create mode 100644 src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
 create mode 100644 src/ao_many_one_e_ints/listj1b_sorted.irp.f

diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
new file mode 100644
index 00000000..65966c81
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -0,0 +1,222 @@
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+  double precision :: j12_mu_r12
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+
+  provide mu_erf final_grid_points j1b_pen ao_overlap_abs
+  call wall_time(wall0)
+
+
+  int2_u_grad1u_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          sigma_ij, beta_ij, factor_ij_1s,center_ij_1s, dist_ij_ipoint,     &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, &
+ !$OMP          final_grid_points, n_max_fit_slat,                  &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
+ !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       &
+ !$OMP          List_all_comb_b3_cent, int2_u_grad1u_j1b2_test)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+        dist_ij_ipoint = ao_prod_dist_grid(j,i,ipoint) ! distance to the grid point for the distribution |chi_i(r)chi_j(r)|
+        sigma_ij = ao_prod_sigma(j,i)                  ! typical spatial extension of the distribution |chi_i(r)chi_j(r)|
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+        do i_1s = 1, List_all_comb_b3_size
+
+          coef        = List_all_comb_b3_coef  (i_1s)
+          beta        = List_all_comb_b3_expo  (i_1s)
+!          if(beta.gt.1.d3)cycle
+          if(dabs(coef).lt.1.d-10)cycle
+          B_center(1) = List_all_comb_b3_cent(1,i_1s)
+          B_center(2) = List_all_comb_b3_cent(2,i_1s)
+          B_center(3) = List_all_comb_b3_cent(3,i_1s)
+          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+          sigma_ij = 1.d0/sigma_ij
+          sigma_ij *= sigma_ij
+          sigma_ij *= 0.5d0
+          double precision :: beta_ij, factor_ij_1s, center_ij_1s(3)
+!          call gaussian_product(sigma_ij,ao_prod_center(1:3,j,i),beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+!          if(factor_ij_1s*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+!          if(factor_ij_1s*dsqpi_3_2*(beta_ij)**(-3/2)*ao_overlap_abs_grid(j,i).lt.1.d-20)cycle
+
+          do i_fit = 1, n_max_fit_slat
+
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+            if(factor_ij_1s*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 20.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-08) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+        enddo
+
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test_2, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+  double precision :: j12_mu_r12,int_j1b
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+  double precision :: beta_ij,center_ij_1s(3),factor_ij_1s
+  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+
+  provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent
+  call wall_time(wall0)
+
+
+  int2_u_grad1u_j1b2_test_2 = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
+ !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b3_size_thr, &
+ !$OMP          final_grid_points, n_max_fit_slat,                  &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test_2)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_b3_size_thr(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+
+          do i_fit = 1, n_max_fit_slat
+
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 20.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-08) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+        enddo
+
+        int2_u_grad1u_j1b2_test_2(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2_test_2(j,i,ipoint) = int2_u_grad1u_j1b2_test_2(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2_test_2', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
new file mode 100644
index 00000000..b8c0801a
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -0,0 +1,287 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R| - 1) / |r - R|
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_mu, int_coulomb
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp,int_j1b
+  double precision           :: wall0, wall1
+  double precision, external :: NAI_pol_mult_erf_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                         &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp, int_j1b)& 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b2_size_thr, final_grid_points, &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b,  &
+ !$OMP          v_ij_erf_rk_cst_mu_j1b_test, mu_erf,                                   &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_b2_size_thr(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+          int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+          int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+
+          tmp += coef * (int_mu - int_coulomb)
+        enddo
+
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint
+  double precision :: wall0, wall1
+
+  call wall_time(wall0)
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, ao_num
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s
+  double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+
+  call wall_time(wall0)
+
+  x_v_ij_erf_rk_cst_mu_tmp_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
+ !$OMP          int_j1b, tmp_x, tmp_y, tmp_z)                                                  & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b2_size_thr, final_grid_points,&
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
+ !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_b2_size_thr(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+          tmp_x += coef * (ints(1) - ints_coulomb(1))
+          tmp_y += coef * (ints(2) - ints_coulomb(2))
+          tmp_z += coef * (ints(3) - ints_coulomb(3))
+        enddo
+
+        x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,j,i,ipoint) = tmp_x
+        x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,j,i,ipoint) = tmp_y
+        x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,j,i,ipoint) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,i,j,ipoint)
+        x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,i,j,ipoint)
+        x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+! TODO analytically
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12)
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s, i_fit
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, n_max_fit_slat,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_b2_size_thr,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_b2_size_thr(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+          do i_fit = 1, n_max_fit_slat
+
+            expo_fit = expo_gauss_j_mu_x(i_fit)
+            coef_fit = coef_gauss_j_mu_x(i_fit)
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
new file mode 100644
index 00000000..58c77f5c
--- /dev/null
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -0,0 +1,188 @@
+
+ BEGIN_PROVIDER [ integer, List_comb_b2_size_thr, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_b2_size_thr]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-10
+ List_comb_b2_size_thr = 0
+ do i = 1, ao_num
+  do j = i, ao_num
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-10)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_final_grid
+      r(1:3) = final_grid_points(1:3,ipoint)
+      weight = final_weight_at_r_vector(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_b2_size_thr(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+ do i = 1, ao_num
+  do j = 1, i-1
+    List_comb_b2_size_thr(j,i) = List_comb_b2_size_thr(i,j)
+  enddo
+ enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_b2_size_thr(:,i))
+ enddo
+ max_List_comb_b2_size_thr = maxval(list) 
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b2_coef, (   max_List_comb_b2_size_thr,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_expo, (   max_List_comb_b2_size_thr,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_cent, (3, max_List_comb_b2_size_thr,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_b2_size_thr ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-10
+ ao_abs_comb_b2_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = i, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-10)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_final_grid
+      r(1:3) = final_grid_points(1:3,ipoint)
+      weight = final_weight_at_r_vector(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b2_coef(icount,j,i) = coef
+      List_comb_thr_b2_expo(icount,j,i) = beta
+      List_comb_thr_b2_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b2_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+ do i = 1, ao_num
+  do j = 1, i-1
+    do icount = 1, List_comb_b2_size_thr(j,i)
+     List_comb_thr_b2_coef(icount,j,i) = List_comb_thr_b2_coef(icount,i,j)
+     List_comb_thr_b2_expo(icount,j,i) = List_comb_thr_b2_expo(icount,i,j)
+     List_comb_thr_b2_cent(1:3,icount,j,i) = List_comb_thr_b2_cent(1:3,icount,i,j)
+    enddo
+  enddo
+ enddo
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ integer, List_comb_b3_size_thr, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_b3_size_thr]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-14
+ List_comb_b3_size_thr = 0
+ do i = 1, ao_num
+  do j = i, ao_num
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     if(dabs(coef).lt.thr)cycle
+     beta        = List_all_comb_b3_expo  (i_1s)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_final_grid
+      r(1:3) = final_grid_points(1:3,ipoint)
+      weight = final_weight_at_r_vector(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_b3_size_thr(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+ do i = 1, ao_num
+  do j = 1, i-1
+    List_comb_b3_size_thr(j,i) = List_comb_b3_size_thr(i,j)
+  enddo
+ enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_b3_size_thr(:,i))
+ enddo
+ max_List_comb_b3_size_thr = maxval(list) 
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b3_coef, (   max_List_comb_b3_size_thr,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_expo, (   max_List_comb_b3_size_thr,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_cent, (3, max_List_comb_b3_size_thr,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_b3_size_thr ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-14
+ ao_abs_comb_b3_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = i, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     if(dabs(coef).lt.thr)cycle
+     beta        = List_all_comb_b3_expo  (i_1s)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_final_grid
+      r(1:3) = final_grid_points(1:3,ipoint)
+      weight = final_weight_at_r_vector(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b3_coef(icount,j,i) = coef
+      List_comb_thr_b3_expo(icount,j,i) = beta
+      List_comb_thr_b3_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b3_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+ do i = 1, ao_num
+  do j = 1, i-1
+    do icount = 1, List_comb_b3_size_thr(j,i)
+     List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j)
+     List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j)
+     List_comb_thr_b3_cent(1,icount,j,i) = List_comb_thr_b3_cent(1,icount,i,j)
+     List_comb_thr_b3_cent(2,icount,j,i) = List_comb_thr_b3_cent(2,icount,i,j)
+     List_comb_thr_b3_cent(3,icount,j,i) = List_comb_thr_b3_cent(3,icount,i,j)
+    enddo
+  enddo
+ enddo
+ 
+ 
+END_PROVIDER 
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
index 22e2fe3d..1af34d74 100644
--- a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -109,3 +109,8 @@ BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_
 
 END_PROVIDER 
 
+
+!BEGIN_PROVIDER [ double precision, ao_abs_prod_j1b, (ao_num, ao_num)]
+! implicit none
+!
+!END_PROVIDER 
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index 29fcb2a9..69953f02 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -14,11 +14,40 @@ program test_ints
   my_n_pt_r_grid = 10 ! small grid for quick debug
   my_n_pt_a_grid = 26 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
- call routine 
+! call routine_int2_u_grad1u_j1b2 
+! call routine_v_ij_erf_rk_cst_mu_j1b
+! call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
+ call routine_v_ij_u_cst_mu_j1b
+! call routine_test_j1b
 
 end
 
-subroutine routine
+subroutine routine_test_j1b
+ implicit none
+ integer :: i,icount,j
+ icount = 0
+! do i = 1, List_all_comb_b2_size
+!  if(dabs(List_all_comb_b2_coef(i)).gt.1.d-10)then
+!   icount += 1
+!  endif
+!  print*,i,List_all_comb_b2_expo(i),List_all_comb_b2_coef(i)
+! enddo
+! print*,'List_all_comb_b2_coef,icount = ',List_all_comb_b2_size
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do icount = 1, List_comb_b3_size_thr(j,i)
+   print*,List_comb_thr_b3_cent(1:3,icount,j,i)
+!   print*,'',j,i
+!   print*,List_comb_b2_size_thr(j,i),List_comb_b3_size_thr(j,i),ao_overlap_abs_grid(j,i)
+   enddo
+  enddo
+ enddo
+ print*,'max_List_comb_b2_size_thr = ',max_List_comb_b2_size_thr,List_all_comb_b2_size
+ print*,'max_List_comb_b2_size_thr = ',max_List_comb_b3_size_thr,List_all_comb_b3_size
+
+end
+
+subroutine routine_int2_u_grad1u_j1b2
  implicit none
  integer :: i,j,ipoint,k,l
  double precision :: weight,accu_relat, accu_abs, contrib
@@ -47,7 +76,15 @@ subroutine routine
    do l = 1, ao_num
     do i = 1, ao_num
      do j = 1, ao_num
-      array(j,i,l,k)     += int2_u_grad1u_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array(j,i,l,k)     += int2_u_grad1u_j1b2_test_2(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      if(dabs(int2_u_grad1u_j1b2(j,i,ipoint)).gt.1.d-6)then
+!       if(dabs(int2_u_grad1u_j1b2_test_2(j,i,ipoint)-int2_u_grad1u_j1b2(j,i,ipoint)).gt.1.d-6)then
+!        print*,int2_u_grad1u_j1b2(j,i,ipoint), int2_u_grad1u_j1b2_test_2(j,i,ipoint),dabs(int2_u_grad1u_j1b2_test_2(j,i,ipoint)-int2_u_grad1u_j1b2(j,i,ipoint))
+!        print*,i,j
+!        print*,final_grid_points(:,i)
+!       stop
+!       endif
+!      endif
       array_ref(j,i,l,k) += int2_u_grad1u_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
      enddo
     enddo
@@ -74,4 +111,183 @@ subroutine routine
 
   
 
+end
+
+subroutine routine_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_erf_rk_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += x_v_ij_erf_rk_cst_mu_tmp_j1b_test(m,j,i,ipoint) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += x_v_ij_erf_rk_cst_mu_tmp_j1b(m,j,i,ipoint)      * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+
+subroutine routine_v_ij_u_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
 end

From f2c3c729781a3c82562f46d7cc0255b282567baa Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Wed, 7 Dec 2022 11:26:34 +0100
Subject: [PATCH 03/68] v0 of DIIS & level shift

---
 src/bi_ortho_mos/bi_density.irp.f    |   2 +-
 src/bi_ortho_mos/mos_rl.irp.f        |  47 ++++
 src/tc_keywords/EZFIO.cfg            |  24 ++
 src/tc_scf/diago_bi_ort_tcfock.irp.f |  39 ++-
 src/tc_scf/diis_tcscf.irp.f          | 181 +++++++++++++
 src/tc_scf/fock_tc.irp.f             |  40 ++-
 src/tc_scf/rh_tcscf.irp.f            | 367 +++++++++++++++++++++++++++
 src/tc_scf/tc_scf.irp.f              |  24 +-
 src/tc_scf/tc_scf_dm.irp.f           |  34 ++-
 src/tc_scf/tc_scf_energy.irp.f       |  12 +-
 src/tc_scf/tc_scf_utils.irp.f        |   1 +
 11 files changed, 724 insertions(+), 47 deletions(-)
 create mode 100644 src/tc_scf/diis_tcscf.irp.f
 create mode 100644 src/tc_scf/rh_tcscf.irp.f

diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f
index 947be870..0de8ce69 100644
--- a/src/bi_ortho_mos/bi_density.irp.f
+++ b/src/bi_ortho_mos/bi_density.irp.f
@@ -22,7 +22,7 @@ BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ]
 !
 ! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
  END_DOC
-  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0               &
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
             , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
             , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) )
 END_PROVIDER
diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f
index 034a436e..9e3ed358 100644
--- a/src/bi_ortho_mos/mos_rl.irp.f
+++ b/src/bi_ortho_mos/mos_rl.irp.f
@@ -37,6 +37,52 @@ end subroutine ao_to_mo_bi_ortho
 
 ! ---
 
+subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao)
+
+  BEGIN_DOC
+  !
+  ! mo_l_coef.T x     A_ao   x mo_r_coef = A_mo
+  ! mo_l_coef.T x ao_overlap x mo_r_coef =  I
+  !
+  ! ==> A_ao = (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: LDA_ao, LDA_mo
+  double precision, intent(in)  :: A_mo(LDA_mo,mo_num)
+  double precision, intent(out) :: A_ao(LDA_ao,ao_num)
+  double precision, allocatable :: tmp_1(:,:), tmp_2(:,:)
+
+  ! ao_overlap x mo_r_coef
+  allocate( tmp_1(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo
+  allocate( tmp_1(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 &
+            , tmp_1, size(tmp_1, 1), A_mo, LDA_mo    &
+            , 0.d0, tmp_2, size(tmp_2, 1) )
+  
+  ! ao_overlap x mo_l_coef
+  tmp_1 = 0.d0
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  call dgemm( 'N', 'T', ao_num, mo_num, mo_num, 1.d0       &
+            , tmp_2, size(tmp_2, 1), tmp_1, size(tmp_1, 1) &
+            , 0.d0, A_ao, LDA_ao )
+  
+  deallocate(tmp_1, tmp_2)
+
+end subroutine mo_to_ao_bi_ortho
+
+! ---
+
 BEGIN_PROVIDER [ double precision, mo_r_coef, (ao_num, mo_num) ]
 
   BEGIN_DOC
@@ -175,3 +221,4 @@ END_PROVIDER
 
 ! ---
 
+
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index 1afa26e9..8db73f9a 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -136,3 +136,27 @@ doc: nb of Gaussians used to fit Jastrow fcts
 interface: ezfio,provider,ocaml
 default: 6
 
+[max_dim_diis_tcscf]
+type: integer
+doc: Maximum size of the DIIS extrapolation procedure
+interface: ezfio,provider,ocaml
+default: 15
+
+[threshold_diis_tcscf]
+type: Threshold
+doc: Threshold on the convergence of the DIIS error vector during a TCSCF calculation. If 0. is chosen, the square root of thresh_tcscf will be used.
+interface: ezfio,provider,ocaml
+default: 0.
+
+[level_shift_tcscf]
+type: Positive_float
+doc: Energy shift on the virtual MOs to improve TCSCF convergence
+interface: ezfio,provider,ocaml
+default: 0.
+
+[tcscf_algorithm]
+type: character*(32)
+doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS]
+interface: ezfio,provider,ocaml
+default: DIIS
+
diff --git a/src/tc_scf/diago_bi_ort_tcfock.irp.f b/src/tc_scf/diago_bi_ort_tcfock.irp.f
index 856b7382..29ca0efe 100644
--- a/src/tc_scf/diago_bi_ort_tcfock.irp.f
+++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f
@@ -1,3 +1,5 @@
+! ---
+
  BEGIN_PROVIDER [ double precision, fock_tc_reigvec_mo, (mo_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, fock_tc_leigvec_mo, (mo_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, eigval_fock_tc_mo, (mo_num)]
@@ -9,32 +11,50 @@
 
   implicit none
   integer                       :: n_real_tc 
-  integer                       :: i, k, l
+  integer                       :: i, j, k, l
   double precision              :: accu_d, accu_nd, accu_tmp
   double precision              :: thr_d, thr_nd
   double precision              :: norm
   double precision, allocatable :: eigval_right_tmp(:)
+  double precision, allocatable :: F_tmp(:,:)
 
   thr_d  = 1d-6
   thr_nd = 1d-6
 
-  allocate( eigval_right_tmp(mo_num) )
+  allocate( eigval_right_tmp(mo_num), F_tmp(mo_num,mo_num) )
 
   PROVIDE Fock_matrix_tc_mo_tot
 
-  call non_hrmt_bieig( mo_num, Fock_matrix_tc_mo_tot, thr_d, thr_nd &
-                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo       & 
+  do i = 1, mo_num
+    do j = 1, mo_num
+      F_tmp(j,i) = Fock_matrix_tc_mo_tot(j,i)
+    enddo
+  enddo
+  ! insert level shift here
+  do i = elec_beta_num+1, elec_alpha_num
+    F_tmp(i,i) += 0.5d0 * level_shift_tcscf
+  enddo
+  do i = elec_alpha_num+1, mo_num
+    F_tmp(i,i) += level_shift_tcscf
+  enddo
+
+  call non_hrmt_bieig( mo_num, F_tmp, thr_d, thr_nd           &
+                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
                      , n_real_tc, eigval_right_tmp )
+
   !if(max_ov_tc_scf)then
-  ! call non_hrmt_fock_mat( mo_num, Fock_matrix_tc_mo_tot, thr_d, thr_nd &
+  ! call non_hrmt_fock_mat( mo_num, F_tmp, thr_d, thr_nd        &
   !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
   !                    , n_real_tc, eigval_right_tmp )
   !else 
-  ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, Fock_matrix_tc_mo_tot &
+  ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, F_tmp     &
   !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
   !                    , n_real_tc, eigval_right_tmp )
   !endif
 
+  deallocate(F_tmp)
+
+
 !  if(n_real_tc .ne. mo_num)then
 !   print*,'n_real_tc ne mo_num ! ',n_real_tc
 !   stop
@@ -42,9 +62,12 @@
 
   eigval_fock_tc_mo = eigval_right_tmp
 !  print*,'Eigenvalues of Fock_matrix_tc_mo_tot'
-!  do i = 1, mo_num
+!  do i = 1, elec_alpha_num
 !    print*, i, eigval_fock_tc_mo(i)
 !  enddo
+!  do i = elec_alpha_num+1, mo_num 
+!    print*, i, eigval_fock_tc_mo(i) - level_shift_tcscf
+!  enddo
 !  deallocate( eigval_right_tmp )
 
   ! L.T x R 
@@ -102,6 +125,8 @@
  
 END_PROVIDER 
 
+! ---
+
  BEGIN_PROVIDER [ double precision, fock_tc_reigvec_ao, (ao_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, fock_tc_leigvec_ao, (ao_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, overlap_fock_tc_eigvec_ao, (mo_num, mo_num) ]
diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f
new file mode 100644
index 00000000..cf339175
--- /dev/null
+++ b/src/tc_scf/diis_tcscf.irp.f
@@ -0,0 +1,181 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero_TCSCF ]
+ 
+  implicit none
+ 
+  if(threshold_DIIS_TCSCF == 0.d0) then
+    threshold_DIIS_nonzero_TCSCF = dsqrt(thresh_tcscf)
+  else
+    threshold_DIIS_nonzero_TCSCF = threshold_DIIS_TCSCF
+  endif
+  ASSERT(threshold_DIIS_nonzero_TCSCF >= 0.d0)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, Q_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_alpha = mo_r_coef x eta_occ_alpha x mo_l_coef.T
+  !
+  ! [Q_alpha]_ij = \sum_{k=1}^{elec_alpha_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_alpha, size(Q_alpha, 1) )
+
+END_PROVIDER
+  
+! ---
+    
+BEGIN_PROVIDER [ double precision, Q_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_beta = mo_r_coef x eta_occ_beta x mo_l_coef.T
+  !
+  ! [Q_beta]_ij = \sum_{k=1}^{elec_beta_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_beta, size(Q_beta, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Q_matrix, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_matrix = 2 mo_r_coef x eta_occ x mo_l_coef.T
+  ! 
+  ! with: 
+  !                        | 1   if i = j = 1, ..., nb of occ orbitals
+  !        [eta_occ]_ij =  |     
+  !                        | 0   otherwise
+  !
+  ! the diis error is defines as:
+  !                         e = F_ao x Q x ao_overlap - ao_overlap x Q x F_ao
+  ! with: 
+  !       mo_l_coef.T x ao_overlap x mo_r_coef = I
+  !       F_mo = mo_l_coef.T x F_ao x mo_r_coef
+  !       F_ao = (ao_overlap x mo_r_coef) x F_mo x (ao_overlap x mo_l_coef).T
+  !
+  ! ==> e = 2 ao_overlap x mo_r_coef x [ F_mo x eta_occ - eta_occ x F_mo ] x (ao_overlap x mo_l_coef).T
+  !
+  !      at convergence:
+  !                                      F_mo x eta_occ - eta_occ x F_mo = 0
+  !                                  ==> [F_mo]_ij ([eta_occ]_ii - [eta_occ]_jj) = 0  
+  !                                  ==> [F_mo]_ia = [F_mo]_ai = 0 where: i = occ and a = vir
+  !                                  ==> Brillouin conditions
+  !
+  END_DOC
+
+  implicit none
+
+  if(elec_alpha_num == elec_beta_num) then
+    Q_matrix = Q_alpha + Q_alpha
+  else
+    Q_matrix = Q_alpha + Q_beta
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)]
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  allocate(tmp(ao_num,ao_num))
+
+  ! F x Q
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                             &
+            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0             &
+            , tmp, size(tmp, 1), ao_overlap, size(ao_overlap, 1) &
+            , 0.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  ! S x Q
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                       &
+            , ao_overlap, size(ao_overlap, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x P x S - S x P x F
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, -1.d0                                  &
+            , tmp, size(tmp, 1), Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+            , 1.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)]
+
+  implicit none
+
+  call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) &
+                        , FQS_SQF_mo, size(FQS_SQF_mo, 1) )
+
+END_PROVIDER
+
+! ---
+
+! BEGIN_PROVIDER [ double precision, eigenval_Fock_tc_ao, (ao_num) ]
+!&BEGIN_PROVIDER [ double precision, eigenvec_Fock_tc_ao, (ao_num,ao_num) ]
+!
+!  BEGIN_DOC
+!  !
+!  ! Eigenvalues and eigenvectors of the Fock matrix over the ao basis
+!  !
+!  ! F' = X.T x F x X   where X = ao_overlap^(-1/2)
+!  ! 
+!  ! F'   x Cr' = Cr' x E ==> F   Cr = Cr x E with Cr = X x Cr'
+!  ! F'.T x Cl' = Cl' x E ==> F.T Cl = Cl x E with Cl = X x Cl'
+!  !
+!  END_DOC
+!
+!  implicit none
+!  double precision, allocatable :: tmp1(:,:), tmp2(:,:)
+!
+!  ! ---
+!  ! Fock matrix in orthogonal basis: F' = X.T x F x X
+!
+!  allocate(tmp1(ao_num,ao_num))
+!  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                                 &
+!            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), S_half_inv, size(S_half_inv, 1) &
+!            , 0.d0, tmp1, size(tmp1, 1) )
+!
+!  allocate(tmp2(ao_num,ao_num))
+!  call dgemm( 'T', 'N', ao_num, ao_num, ao_num, 1.d0               &
+!            , S_half_inv, size(S_half_inv, 1), tmp1, size(tmp1, 1) &
+!            , 0.d0, tmp2, size(tmp2, 1) )
+!
+!  ! ---
+!
+!  ! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues
+!  ! TODO
+!
+!  ! Back-transform eigenvectors: C =X.C'
+!
+!END_PROVIDER
+
+! ---
+
+~                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
index 6b1c1d77..b9611836 100644
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@@ -141,27 +141,49 @@ END_PROVIDER
  BEGIN_PROVIDER [ double precision, grad_non_hermit_left]
 &BEGIN_PROVIDER [ double precision, grad_non_hermit_right]
 &BEGIN_PROVIDER [ double precision, grad_non_hermit]
- implicit none
+
+  implicit none
   integer :: i, k
-  grad_non_hermit_left = 0.d0
+
+  grad_non_hermit_left  = 0.d0
   grad_non_hermit_right = 0.d0
+
   do i = 1, elec_beta_num ! doc --> SOMO
     do k = elec_beta_num+1, elec_alpha_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
     enddo
   enddo
+
   do i = 1, elec_beta_num ! doc --> virt 
     do k = elec_alpha_num+1, mo_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
     enddo
   enddo
+
   do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt 
     do k = elec_alpha_num+1, mo_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
     enddo
   enddo
- grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right
+
+  grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right
+
 END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
+
+  implicit none
+
+  call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) &
+                        , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) )
+
+END_PROVIDER
+
+! ---
+
+
diff --git a/src/tc_scf/rh_tcscf.irp.f b/src/tc_scf/rh_tcscf.irp.f
new file mode 100644
index 00000000..dc7a34fc
--- /dev/null
+++ b/src/tc_scf/rh_tcscf.irp.f
@@ -0,0 +1,367 @@
+! ---
+
+subroutine rh_tcscf()
+
+  BEGIN_DOC
+  !
+  ! Roothaan-Hall algorithm for TC-SCF calculation
+  !
+  END_DOC
+
+  implicit none
+
+  integer                       :: i, j
+  integer                       :: iteration_TCSCF, dim_DIIS, index_dim_DIIS
+  double precision              :: energy_TCSCF, energy_TCSCF_1e, energy_TCSCF_2e, energy_TCSCF_3e, gradie_TCSCF
+  double precision              :: energy_TCSCF_previous, delta_energy_TCSCF
+  double precision              :: gradie_TCSCF_previous, delta_gradie_TCSCF
+  double precision              :: max_error_DIIS_TCSCF
+  double precision              :: level_shift_TCSCF_save
+  double precision, allocatable :: F_DIIS(:,:,:), e_DIIS(:,:,:)
+  double precision, allocatable :: mo_r_coef_save(:,:), mo_l_coef_save(:,:)
+
+  logical, external             :: qp_stop
+
+
+  !PROVIDE ao_md5 mo_occ
+  PROVIDE level_shift_TCSCF
+
+  allocate( mo_r_coef_save(ao_num,mo_num), mo_l_coef_save(ao_num,mo_num) &
+          , F_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF), e_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF) )
+
+  F_DIIS         = 0.d0
+  e_DIIS         = 0.d0
+  mo_l_coef_save = 0.d0
+  mo_r_coef_save = 0.d0
+
+  call write_time(6)
+
+  ! ---
+  ! Initialize energies and density matrices
+
+  energy_TCSCF_previous = TC_HF_energy
+  energy_TCSCF_1e       = TC_HF_one_e_energy
+  energy_TCSCF_2e       = TC_HF_two_e_energy
+  energy_TCSCF_3e       = 0.d0
+  if(three_body_h_tc) then
+    energy_TCSCF_3e     = diag_three_elem_hf
+  endif
+  gradie_TCSCF_previous = grad_non_hermit
+  delta_energy_TCSCF    = 1.d0
+  delta_gradie_TCSCF    = 1.d0
+  iteration_TCSCF       = 0
+  dim_DIIS              = 0
+  max_error_DIIS_TCSCF  = 1.d0
+
+  ! ---
+
+  ! Start of main SCF loop
+
+  PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot
+
+  do while( (max_error_DIIS_TCSCF > threshold_DIIS_nonzero_TCSCF) .or. &
+            (dabs(delta_energy_TCSCF) > thresh_TCSCF)             .or. &
+            (dabs(delta_gradie_TCSCF) > dsqrt(thresh_TCSCF))           )
+
+    iteration_TCSCF += 1
+    if(iteration_TCSCF > n_it_TCSCF_max) then
+      print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+      exit
+    endif
+
+    ! TODO
+    !if(frozen_orb_scf) then
+    !  call initialize_mo_coef_begin_iteration
+    !endif
+
+    ! current size of the DIIS space
+    dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF)
+
+    ! ---
+
+    if((tcscf_algorithm == 'DIIS') .and. (dabs(delta_energy_TCSCF) > 1.d-6))  then
+
+      ! store Fock and error matrices at each iteration
+      index_dim_DIIS = mod(dim_DIIS-1, max_dim_DIIS_TCSCF) + 1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          F_DIIS(i,j,index_dim_DIIS) = Fock_matrix_tc_ao_tot(i,j)
+          e_DIIS(i,j,index_dim_DIIS) = FQS_SQF_ao(i,j)
+        enddo
+      enddo
+
+      ! Compute the extrapolated Fock matrix
+      call extrapolate_TC_Fock_matrix( e_DIIS, F_DIIS                   &
+                                     , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                                     , iteration_TCSCF, dim_DIIS )
+
+      Fock_matrix_tc_ao_alpha = 0.5d0 * Fock_matrix_tc_ao_tot
+      Fock_matrix_tc_ao_beta  = 0.5d0 * Fock_matrix_tc_ao_tot
+      TOUCH Fock_matrix_tc_ao_alpha Fock_matrix_tc_ao_beta
+
+    endif
+
+    ! ---
+
+    mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+    mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+    TOUCH mo_l_coef mo_r_coef
+
+    ! ---
+
+    ! TODO
+    !if(frozen_orb_scf) then
+    ! call reorder_core_orb
+    ! call initialize_mo_coef_begin_iteration
+    !endif
+
+    ! calculate error vectors
+    max_error_DIIS_TCSCF = maxval(abs(FQS_SQF_mo))
+
+    energy_TCSCF       = TC_HF_energy
+    energy_TCSCF_1e    = TC_HF_one_e_energy
+    energy_TCSCF_2e    = TC_HF_two_e_energy
+    energy_TCSCF_3e    = 0.d0
+    if(three_body_h_tc) then
+      energy_TCSCF_3e  = diag_three_elem_hf
+    endif
+    gradie_TCSCF       = grad_non_hermit
+    delta_energy_TCSCF = energy_TCSCF - energy_TCSCF_previous
+    delta_gradie_TCSCF = gradie_TCSCF - gradie_TCSCF_previous
+
+    if((TCSCF_algorithm == 'DIIS') .and. (delta_gradie_TCSCF > 0.d0)) then
+      Fock_matrix_tc_ao_tot(1:ao_num,1:ao_num) = F_DIIS(1:ao_num,1:ao_num,index_dim_DIIS)
+      Fock_matrix_tc_ao_alpha = 0.5d0 * Fock_matrix_tc_ao_tot
+      Fock_matrix_tc_ao_beta  = 0.5d0 * Fock_matrix_tc_ao_tot
+      TOUCH Fock_matrix_tc_ao_alpha Fock_matrix_tc_ao_beta
+    endif
+
+    ! ---
+
+    level_shift_TCSCF_save = level_shift_TCSCF
+
+    mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num)
+    mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num)
+
+    do while(delta_gradie_TCSCF > 0.d0)
+
+      mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num)
+      mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num)
+
+      if(level_shift_TCSCF <= .1d0) then
+        level_shift_TCSCF = 1.d0
+      else
+        level_shift_TCSCF = level_shift_TCSCF * 3.0d0
+      endif
+      TOUCH mo_r_coef mo_l_coef level_shift_TCSCF
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+
+      !if(frozen_orb_scf) then
+      !  call reorder_core_orb
+      !  call initialize_mo_coef_begin_iteration
+      !endif
+      TOUCH mo_l_coef mo_r_coef
+
+      energy_TCSCF       = TC_HF_energy
+      energy_TCSCF_1e    = TC_HF_one_e_energy
+      energy_TCSCF_2e    = TC_HF_two_e_energy
+      energy_TCSCF_3e    = 0.d0
+      if(three_body_h_tc) then
+        energy_TCSCF_3e  = diag_three_elem_hf
+      endif
+      gradie_TCSCF       = grad_non_hermit
+      delta_energy_TCSCF = energy_TCSCF - energy_TCSCF_previous
+      delta_gradie_TCSCF = gradie_TCSCF - gradie_TCSCF_previous
+
+      if(level_shift_TCSCF - level_shift_TCSCF_save > 40.d0) then
+        level_shift_TCSCF = level_shift_TCSCF_save * 4.d0
+        SOFT_TOUCH level_shift_TCSCF
+        exit
+      endif
+
+      dim_DIIS = 0
+    enddo
+
+    ! ---
+
+    level_shift_TCSCF = level_shift_TCSCF * 0.5d0
+    SOFT_TOUCH level_shift_TCSCF
+
+    energy_TCSCF_previous = energy_TCSCF
+    energy_TCSCF_1e       = TC_HF_one_e_energy
+    energy_TCSCF_2e       = TC_HF_two_e_energy
+    energy_TCSCF_3e       = 0.d0
+    if(three_body_h_tc) then
+      energy_TCSCF_3e     = diag_three_elem_hf
+    endif
+    gradie_TCSCF_previous = grad_non_hermit
+
+    print *, ' iteration         = ', iteration_TCSCF
+    print *, ' total TC energy   = ', energy_TCSCF 
+    print *, ' 1-e   TC energy   = ', energy_TCSCF_1e
+    print *, ' 2-e   TC energy   = ', energy_TCSCF_2e
+    print *, ' 3-e   TC energy   = ', energy_TCSCF_3e
+    print *, ' |delta TC energy| = ', delta_energy_TCSCF
+    print *, ' delta TC gradient = ', delta_gradie_TCSCF
+    print *, ' max TC DIIS error = ', max_error_DIIS_TCSCF 
+    print *, ' TC DIIS dim       = ', dim_DIIS
+    print *, ' TC level shift    = ', level_shift_TCSCF
+
+    if(delta_gradie_TCSCF < 0.d0) then
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      call ezfio_set_tc_scf_bitc_energy(energy_TCSCF)
+    endif
+
+    if(qp_stop()) exit
+  enddo
+
+  ! ---
+
+  !if(iteration_TCSCF < n_it_TCSCF_max) then
+  !  mo_label = 'Canonical'
+  !endif
+
+  !if(.not.frozen_orb_scf) then
+  ! call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo, size(Fock_matrix_mo,1), size(Fock_matrix_mo, 2), mo_label, 1, .true.)
+  ! call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
+  ! call orthonormalize_mos
+  ! call save_mos
+  !endif
+  !call write_double(6, energy_TCSCF, 'TCSCF energy')
+
+  call write_time(6)
+
+end
+
+! ---
+
+subroutine extrapolate_TC_Fock_matrix(e_DIIS, F_DIIS, F_ao, size_F_ao, iteration_TCSCF, dim_DIIS)
+
+  BEGIN_DOC
+  !
+  ! Compute the extrapolated Fock matrix using the DIIS procedure
+  !
+  ! e = \sum_i c_i e_i and \sum_i c_i = 1 
+  ! ==> lagrange multiplier with L = |e|^2 - \lambda (\sum_i c_i = 1)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)    :: iteration_TCSCF, size_F_ao
+  integer,          intent(inout) :: dim_DIIS
+  double precision, intent(in)    :: F_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(in)    :: e_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(inout) :: F_ao(size_F_ao,ao_num)
+
+  double precision, allocatable   :: B_matrix_DIIS(:,:), X_vector_DIIS(:), C_vector_DIIS(:)
+
+  integer                         :: i, j, k, l, i_DIIS, j_DIIS
+  integer                         :: lwork
+  double precision                :: rcond, ferr, berr
+  integer,          allocatable   :: iwork(:)
+  double precision, allocatable   :: scratch(:,:)
+
+  if(dim_DIIS < 1) then
+    return
+  endif
+
+  allocate( B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), X_vector_DIIS(dim_DIIS+1) &
+          , C_vector_DIIS(dim_DIIS+1), scratch(ao_num,ao_num) )
+
+  ! Compute the matrices B and X
+  B_matrix_DIIS(:,:) = 0.d0
+  do j = 1, dim_DIIS
+    j_DIIS = min(dim_DIIS, mod(iteration_TCSCF-j, max_dim_DIIS_TCSCF)+1)
+
+    do i = 1, dim_DIIS
+      i_DIIS = min(dim_DIIS, mod(iteration_TCSCF-i, max_dim_DIIS_TCSCF)+1)
+
+      ! Compute product of two errors vectors
+      do l = 1, ao_num
+        do k = 1, ao_num
+          B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + e_DIIS(k,l,i_DIIS) * e_DIIS(k,l,j_DIIS)
+        enddo
+      enddo
+
+    enddo
+  enddo
+
+  ! Pad B matrix and build the X matrix
+
+  C_vector_DIIS(:) = 0.d0
+  do i = 1, dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+  enddo
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+  deallocate(scratch)
+
+  ! Estimate condition number of B
+  integer                       :: info
+  double precision              :: anorm
+  integer,          allocatable :: ipiv(:)
+  double precision, allocatable :: AF(:,:)
+  double precision, external :: dlange
+
+  lwork = max((dim_DIIS+1)**2, (dim_DIIS+1)*5)
+  allocate(AF(dim_DIIS+1,dim_DIIS+1))
+  allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) )
+  allocate(scratch(lwork,1))
+  scratch(:,1) = 0.d0
+
+  anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, size(B_matrix_DIIS, 1), scratch(1,1))
+
+  AF(:,:) = B_matrix_DIIS(:,:)
+  call dgetrf(dim_DIIS+1, dim_DIIS+1, AF, size(AF, 1), ipiv, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  call dgecon('1', dim_DIIS+1, AF, size(AF, 1), anorm, rcond, scratch, iwork, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  if(rcond < 1.d-14) then
+    dim_DIIS = 0
+    return
+  endif
+
+  ! solve the linear system C = B x X
+
+  X_vector_DIIS = C_vector_DIIS
+  call dgesv(dim_DIIS+1, 1, B_matrix_DIIS, size(B_matrix_DIIS, 1), ipiv , X_vector_DIIS, size(X_vector_DIIS, 1), info)
+
+  deallocate(scratch, AF, iwork)
+  if(info < 0) then
+    stop ' bug in TC-DIIS'
+  endif
+
+  ! Compute extrapolated Fock matrix
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num > 200)
+  do j = 1, ao_num
+    do i = 1, ao_num
+      F_ao(i,j) = 0.d0
+    enddo
+    do k = 1, dim_DIIS
+      if(dabs(X_vector_DIIS(k)) < 1.d-10) cycle
+      do i = 1,ao_num
+        ! FPE here
+        F_ao(i,j) = F_ao(i,j) + X_vector_DIIS(k) * F_DIIS(i,j,dim_DIIS-k+1)
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+end
+
+! ---
+
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
index 48cbbdc0..2b751e50 100644
--- a/src/tc_scf/tc_scf.irp.f
+++ b/src/tc_scf/tc_scf.irp.f
@@ -18,11 +18,16 @@ program tc_scf
   !call create_guess
   !call orthonormalize_mos
 
-  call routine_scf()
+  PROVIDE tcscf_algorithm
+  if(tcscf_algorithm == 'DIIS') then
+    call rh_tcscf()
+  else
+    call simple_tcscf()
+  endif
+
   call minimize_tc_orb_angles()
   call print_energy_and_mos()
 
-
 end
 
 ! ---
@@ -64,7 +69,7 @@ end subroutine create_guess
 
 ! ---
 
-subroutine routine_scf()
+subroutine simple_tcscf()
 
   implicit none
   integer                       :: i, j, it
@@ -79,9 +84,9 @@ subroutine routine_scf()
   !print*,'grad_hermit = ', grad_hermit
   print*,'***'
   print*,'TC HF total energy = ', TC_HF_energy
-  print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
+  print*,'TC HF 1 e   energy = ', TC_HF_one_e_energy
   print*,'TC HF 2 e   energy = ', TC_HF_two_e_energy
-  if(three_body_h_tc)then
+  if(three_body_h_tc) then
    print*,'TC HF 3 body       = ', diag_three_elem_hf
   endif
   print*,'***'
@@ -99,7 +104,6 @@ subroutine routine_scf()
    call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
    TOUCH mo_l_coef mo_r_coef
 
-
   else
 
    print*,'grad_hermit = ',grad_hermit
@@ -122,7 +126,7 @@ subroutine routine_scf()
       print*,'iteration = ', it
       print*,'***'
       print*,'TC HF total energy = ', TC_HF_energy
-      print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
+      print*,'TC HF 1 e   energy = ', TC_HF_one_e_energy
       print*,'TC HF 2 non hermit = ', TC_HF_two_e_energy
       if(three_body_h_tc)then
        print*,'TC HF 3 body       = ', diag_three_elem_hf
@@ -161,7 +165,7 @@ subroutine routine_scf()
       print*,'iteration = ', it
       print*,'***'
       print*,'TC HF total energy = ', TC_HF_energy
-      print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
+      print*,'TC HF 1 e   energy = ', TC_HF_one_e_energy
       print*,'TC HF 2 e   energy = ', TC_HF_two_e_energy
       print*,'TC HF 3 body       = ', diag_three_elem_hf
       print*,'***'
@@ -174,11 +178,11 @@ subroutine routine_scf()
   endif
 
   print*,'Energy converged !'
-  call print_energy_and_mos
+  call print_energy_and_mos()
 
   deallocate(rho_old, rho_new)
 
-end subroutine routine_scf
+end subroutine simple_tcscf
 
 ! ---
 
diff --git a/src/tc_scf/tc_scf_dm.irp.f b/src/tc_scf/tc_scf_dm.irp.f
index f6ae3e1f..1f054a30 100644
--- a/src/tc_scf/tc_scf_dm.irp.f
+++ b/src/tc_scf/tc_scf_dm.irp.f
@@ -1,25 +1,31 @@
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) ]
- implicit none
- if(bi_ortho)then
-  TCSCF_density_matrix_ao_beta  = TCSCF_bi_ort_dm_ao_beta
- else
-  TCSCF_density_matrix_ao_beta  = SCF_density_matrix_ao_beta
- endif
+  implicit none
+  if(bi_ortho) then
+    TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta
+  else
+    TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta
+  endif
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num) ]
- implicit none
- if(bi_ortho)then
-  TCSCF_density_matrix_ao_alpha  = TCSCF_bi_ort_dm_ao_alpha
- else
-  TCSCF_density_matrix_ao_alpha  = SCF_density_matrix_ao_alpha
- endif
+  implicit none
+  if(bi_ortho)then
+    TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha
+  else
+    TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha
+  endif
 END_PROVIDER 
 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num) ]
- implicit none
-  TCSCF_density_matrix_ao_tot  = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha
+  implicit none
+  TCSCF_density_matrix_ao_tot = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha
 END_PROVIDER 
 
 
diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f
index aa2a16ff..c60ce761 100644
--- a/src/tc_scf/tc_scf_energy.irp.f
+++ b/src/tc_scf/tc_scf_energy.irp.f
@@ -1,6 +1,6 @@
 
  BEGIN_PROVIDER [ double precision, TC_HF_energy]
-&BEGIN_PROVIDER [ double precision, TC_HF_one_electron_energy]
+&BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy]
 &BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy]
 
   BEGIN_DOC
@@ -11,19 +11,19 @@
   integer :: i, j
 
   TC_HF_energy = nuclear_repulsion
-  TC_HF_one_electron_energy = 0.d0
+  TC_HF_one_e_energy = 0.d0
   TC_HF_two_e_energy = 0.d0
 
   do j = 1, ao_num
     do i = 1, ao_num
       TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_non_hermit_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) &
-                                    + two_e_tc_non_hermit_integral_beta(i,j)  * TCSCF_density_matrix_ao_beta(i,j) )
-      TC_HF_one_electron_energy += ao_one_e_integrals_tc_tot(i,j) &
-                                       * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
+                                    + two_e_tc_non_hermit_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) )
+      TC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) &
+                          * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
     enddo
   enddo
 
-  TC_HF_energy += TC_HF_one_electron_energy + TC_HF_two_e_energy
+  TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy
   TC_HF_energy += diag_three_elem_hf
 
 END_PROVIDER
diff --git a/src/tc_scf/tc_scf_utils.irp.f b/src/tc_scf/tc_scf_utils.irp.f
index 09a4a1b9..dde477c4 100644
--- a/src/tc_scf/tc_scf_utils.irp.f
+++ b/src/tc_scf/tc_scf_utils.irp.f
@@ -40,3 +40,4 @@ subroutine LTxSxR(n, m, L, S, R, C)
 end subroutine LTxR
 
 ! ---
+

From 06a2f32b1d04cfb6faee94d4d0ad7898fa4fe437 Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Thu, 8 Dec 2022 00:35:40 +0100
Subject: [PATCH 04/68] fixed bugs + cleaning in TC-SCF DIIS

---
 src/bi_ort_ints/semi_num_ints_mo.irp.f        |  45 +++--
 src/bi_ortho_mos/mos_rl.irp.f                 |   4 +-
 src/non_hermit_dav/biorthog.irp.f             |  48 ++---
 .../lapack_diag_non_hermit.irp.f              | 174 +++++++++---------
 src/scf_utils/roothaan_hall_scf.irp.f         |  16 +-
 src/tc_keywords/EZFIO.cfg                     |  10 +-
 src/tc_scf/diago_bi_ort_tcfock.irp.f          |  53 +++++-
 src/tc_scf/diis_tcscf.irp.f                   |   7 +-
 src/tc_scf/fock_tc.irp.f                      |  71 ++++---
 src/tc_scf/fock_three.irp.f                   | 102 +++++-----
 src/tc_scf/rh_tcscf.irp.f                     | 171 +++++++----------
 src/tc_scf/routines_rotates.irp.f             |   2 +-
 src/tc_scf/tc_scf.irp.f                       |  91 +++++----
 13 files changed, 426 insertions(+), 368 deletions(-)

diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
index 33f512cf..4762c25e 100644
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -122,35 +122,40 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
 
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)]
 
   implicit none
   integer :: ipoint
+  double precision :: wall0, wall1
 
- print*,'providing int2_grad1_u12_bimo_transp'
- double precision :: wall0, wall1
- call wall_time(wall0)
- !$OMP PARALLEL         &
- !$OMP DEFAULT (NONE)   &
- !$OMP PRIVATE (ipoint) & 
- !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
- !$OMP DO SCHEDULE (dynamic)
-  do ipoint = 1, n_points_final_grid
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
- call wall_time(wall1)
- print*,'Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
+  !print *, ' providing int2_grad1_u12_bimo_transp'
+
+  call wall_time(wall0)
+  !$OMP PARALLEL         &
+  !$OMP DEFAULT (NONE)   &
+  !$OMP PRIVATE (ipoint) & 
+  !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
+  !$OMP DO SCHEDULE (dynamic)
+   do ipoint = 1, n_points_final_grid
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
+
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3, mo_num, mo_num )]
  implicit none
  integer          :: i, j, ipoint
diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f
index 9e3ed358..d51999fc 100644
--- a/src/bi_ortho_mos/mos_rl.irp.f
+++ b/src/bi_ortho_mos/mos_rl.irp.f
@@ -61,7 +61,7 @@ subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao)
             , 0.d0, tmp_1, size(tmp_1, 1) )
 
   ! (ao_overlap x mo_r_coef) x A_mo
-  allocate( tmp_1(ao_num,mo_num) )
+  allocate( tmp_2(ao_num,mo_num) )
   call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 &
             , tmp_1, size(tmp_1, 1), A_mo, LDA_mo    &
             , 0.d0, tmp_2, size(tmp_2, 1) )
@@ -73,7 +73,7 @@ subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao)
             , 0.d0, tmp_1, size(tmp_1, 1) )
 
   ! (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
-  call dgemm( 'N', 'T', ao_num, mo_num, mo_num, 1.d0       &
+  call dgemm( 'N', 'T', ao_num, ao_num, mo_num, 1.d0       &
             , tmp_2, size(tmp_2, 1), tmp_1, size(tmp_1, 1) &
             , 0.d0, A_ao, LDA_ao )
   
diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f
index 926a20f1..89e5b4f4 100644
--- a/src/non_hermit_dav/biorthog.irp.f
+++ b/src/non_hermit_dav/biorthog.irp.f
@@ -283,16 +283,16 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   ! -------------------------------------------------------------------------------------
   !
 
-  print *, ' '
-  print *, ' Computing the left/right eigenvectors ...'
-  print *, ' '
+  !print *, ' '
+  !print *, ' Computing the left/right eigenvectors ...'
+  !print *, ' '
 
-  allocate( WR(n), WI(n), VL(n,n), VR(n,n) ) 
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n)) 
   
-  print *, ' fock matrix'
-  do i = 1, n
-    write(*, '(1000(F16.10,X))') A(i,:)
-  enddo
+  !print *, ' fock matrix'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') A(i,:)
+  !enddo
 
   !thr_cut = 1.d-15
   !call cancel_small_elmts(A, n, thr_cut)
@@ -301,11 +301,11 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   call lapack_diag_non_sym(n, A, WR, WI, VL, VR)
   !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR)
 
-  print *, ' '
-  print *, ' eigenvalues'
-  do i = 1, n
-    write(*, '(1000(F16.10,X))') WR(i), WI(i)
-  enddo
+  !print *, ' '
+  !print *, ' eigenvalues'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') WR(i), WI(i)
+  !enddo
   !print *, ' right eigenvect bef' 
   !do i = 1, n
   !  write(*, '(1000(F16.10,X))') VR(:,i)
@@ -328,9 +328,10 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   !                  track & sort the real eigenvalues 
 
   n_good = 0
-  thr    = 1.d-3
+  !thr    = 100d0
+  thr    = Im_thresh_tcscf
   do i = 1, n
-    print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    !print*, 'Re(i) + Im(i)', WR(i), WI(i)
     if(dabs(WI(i)) .lt. thr) then
       n_good += 1
     else
@@ -402,23 +403,24 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
 
   if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .lt. thr_d) ) then
 
-    print *, ' lapack vectors are normalized and bi-orthogonalized'
+    !print *, ' lapack vectors are normalized and bi-orthogonalized'
     deallocate(S)
     return
 
-  elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
+  ! accu_nd is modified after adding the normalization
+  !elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
 
-    print *, ' lapack vectors are not normalized but bi-orthogonalized'
-    call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
+  !  print *, ' lapack vectors are not normalized but bi-orthogonalized'
+  !  call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
 
-    call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
+  !  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
 
-    deallocate(S)
-    return
+  !  deallocate(S)
+  !  return
 
   else
 
-    print *, ' lapack vectors are not normalized neither bi-orthogonalized'
+    !print *, ' lapack vectors are not normalized neither bi-orthogonalized'
 
     ! ---
 
diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
index 53c62ce8..0d652af4 100644
--- a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
+++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
@@ -930,7 +930,7 @@ subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, s
 
     tmp_abs = tmp_abs + tmp
     V_nrm   = V_nrm   + U_nrm 
-    write(*,'(I4,X,(100(F25.16,X)))')j,eigval(j), tmp, U_nrm
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
 
   enddo
 
@@ -973,7 +973,7 @@ subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, s
 
     tmp_abs = tmp_abs + tmp
     V_nrm   = V_nrm   + U_nrm 
-    write(*,'(I4,X,(100(F25.16,X)))')j,eigval(j), tmp, U_nrm
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
 
   enddo
 
@@ -1082,7 +1082,7 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
   double precision, allocatable   :: S(:,:), tmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
-  print *, ' apply SVD to orthogonalize & normalize weighted vectors'
+  !print *, ' apply SVD to orthogonalize & normalize weighted vectors'
 
   ! ---
 
@@ -1097,10 +1097,10 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1160,10 +1160,10 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   deallocate(S)
 
@@ -1185,7 +1185,7 @@ subroutine impose_orthog_svd(n, m, C)
   double precision, allocatable   :: S(:,:), tmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
-  print *, ' apply SVD to orthogonalize & normalize vectors'
+  !print *, ' apply SVD to orthogonalize & normalize vectors'
 
   ! ---
 
@@ -1196,10 +1196,10 @@ subroutine impose_orthog_svd(n, m, C)
             , C, size(C, 1), C, size(C, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' eigenvec overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1224,6 +1224,7 @@ subroutine impose_orthog_svd(n, m, C)
   if(num_linear_dependencies > 0) then
     write(*,*) ' linear dependencies = ', num_linear_dependencies
     write(*,*) ' m                   = ', m
+    write(*,*) ' try with Graham-Schmidt'
     stop
   endif
 
@@ -1256,10 +1257,10 @@ subroutine impose_orthog_svd(n, m, C)
             , C, size(C, 1), C, size(C, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' eigenvec overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   deallocate(S)
 
@@ -1296,10 +1297,10 @@ subroutine impose_orthog_svd_overlap(n, m, C, overlap)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' eigenvec overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1358,10 +1359,10 @@ subroutine impose_orthog_svd_overlap(n, m, C, overlap)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' eigenvec overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
   deallocate(S)
 
 end subroutine impose_orthog_svd_overlap
@@ -1528,11 +1529,11 @@ subroutine impose_orthog_degen_eigvec(n, e0, C0)
   enddo
 
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -1677,7 +1678,7 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
   double precision                :: accu_d, accu_nd, s_tmp
   double precision, allocatable   :: S(:,:)
 
-  print *, ' check bi-orthonormality'
+  !print *, ' check bi-orthonormality'
 
   ! ---
 
@@ -1714,15 +1715,19 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
     enddo
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
-  print*, '    diag acc: ', accu_d
-  print*, ' nondiag acc: ', accu_nd
+  !print*, '    diag acc bef = ', accu_d
+  !print*, ' nondiag acc bef = ', accu_nd
 
   ! ---
 
   if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
 
     do i = 1, m
-      print *, i, S(i,i)
+      if(S(i,i) <= 0.d0) then
+        print *, ' overap negative'
+        print *, i, S(i,i)
+        exit
+      endif
       if(dabs(S(i,i) - 1.d0) .gt. thr_d) then
         s_tmp = 1.d0 / dsqrt(S(i,i))
         do j = 1, n
@@ -1757,8 +1762,8 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
     enddo
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
-  print *, '    diag acc: ', accu_d
-  print *, ' nondiag acc: ', accu_nd
+  !print *, '    diag acc aft = ', accu_d
+  !print *, ' nondiag acc aft = ', accu_nd
 
   deallocate(S)
 
@@ -1801,10 +1806,10 @@ subroutine check_weighted_biorthog(n, m, W, Vl, Vr, thr_d, thr_nd, accu_d, accu_
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1852,17 +1857,18 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_
   integer                       :: i, j
   double precision, allocatable :: SS(:,:)
 
-  print *, ' check bi-orthogonality'
+  !print *, ' check bi-orthogonality'
 
   ! ---
 
   call dgemm( 'T', 'N', m, m, n, 1.d0          &
             , Vl, size(Vl, 1), Vr, size(Vr, 1) &
             , 0.d0, S, size(S, 1) )
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
+
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1877,12 +1883,12 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
 
-  print *, ' accu_nd = ', accu_nd
-  print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+  !print *, ' accu_nd = ', accu_nd
+  !print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
 
   ! ---
 
-  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+  if(stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) then
     print *, ' non bi-orthogonal vectors !'
     print *, ' accu_nd = ', accu_nd
     print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
@@ -1912,12 +1918,12 @@ subroutine check_orthog(n, m, V, accu_d, accu_nd, S)
             , V, size(V, 1), V, size(V, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ''
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
-  print *, ''
+  !print *, ''
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+  !print *, ''
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1981,11 +1987,11 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0)
     enddo
   enddo
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i), e0(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i), e0(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -2181,11 +2187,11 @@ subroutine impose_unique_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, C0, W0, L0,
     enddo
   enddo
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -2414,10 +2420,10 @@ subroutine impose_biorthog_svd(n, m, L, R)
             , L, size(L, 1), R, size(R, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -2489,10 +2495,11 @@ subroutine impose_biorthog_svd(n, m, L, R)
             , L, size(L, 1), R, size(R, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
   deallocate(S)
 
   ! ---
@@ -2806,10 +2813,10 @@ subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F25.16,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F25.16,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -2886,10 +2893,11 @@ subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' overlap aft SVD with overlap: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD with overlap: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
   deallocate(S)
 
   return
diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f
index 3b9eaeb4..56a1ed8e 100644
--- a/src/scf_utils/roothaan_hall_scf.irp.f
+++ b/src/scf_utils/roothaan_hall_scf.irp.f
@@ -29,11 +29,11 @@ END_DOC
 
   call write_time(6)
 
-  print*,'Energy of the guess = ',SCF_energy
+  print*,'energy of the guess = ',SCF_energy
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
     '====','================','================','================','================'
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
-    '  N ', 'Energy  ', 'Energy diff  ',  'DIIS error  ', 'Level shift   '
+    '  N ', 'energy  ', 'energy diff  ',  'DIIS error  ', 'Level shift   '
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
     '====','================','================','================','================'
 
@@ -69,9 +69,9 @@ END_DOC
     if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-6) )  then
 
       ! Store Fock and error matrices at each iteration
+      index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
       do j=1,ao_num
         do i=1,ao_num
-          index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
           Fock_matrix_DIIS (i,j,index_dim_DIIS) = Fock_matrix_AO(i,j)
           error_matrix_DIIS(i,j,index_dim_DIIS) = FPS_SPF_matrix_AO(i,j)
         enddo
@@ -106,8 +106,8 @@ END_DOC
 !   SCF energy
 
     energy_SCF = SCF_energy
-    Delta_Energy_SCF = energy_SCF - energy_SCF_previous
-    if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then
+    Delta_energy_SCF = energy_SCF - energy_SCF_previous
+    if ( (SCF_algorithm == 'DIIS').and.(Delta_energy_SCF > 0.d0) ) then
       Fock_matrix_AO(1:ao_num,1:ao_num) = Fock_matrix_DIIS (1:ao_num,1:ao_num,index_dim_DIIS)
       Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
       Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
@@ -131,15 +131,17 @@ END_DOC
         call initialize_mo_coef_begin_iteration
       endif
       TOUCH mo_coef
-      Delta_Energy_SCF = SCF_energy - energy_SCF_previous
+      Delta_energy_SCF = SCF_energy - energy_SCF_previous
       energy_SCF = SCF_energy
       if (level_shift-level_shift_save > 40.d0) then
         level_shift = level_shift_save * 4.d0
         SOFT_TOUCH level_shift
         exit
       endif
+
       dim_DIIS=0
     enddo
+
     level_shift = level_shift * 0.5d0
     SOFT_TOUCH level_shift
     energy_SCF_previous = energy_SCF
@@ -175,7 +177,7 @@ END_DOC
    call save_mos
   endif
 
-  call write_double(6, Energy_SCF, 'SCF energy')
+  call write_double(6, energy_SCF, 'SCF energy')
 
   call write_time(6)
 
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index 8db73f9a..9df73154 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -86,13 +86,13 @@ default: False
 type: Threshold
 doc: Threshold on the convergence of the Hartree Fock energy.
 interface: ezfio,provider,ocaml
-default: 1.e-10
+default: 1.e-12
 
 [n_it_tcscf_max]
 type: Strictly_positive_int
 doc: Maximum number of SCF iterations
 interface: ezfio,provider,ocaml
-default: 500
+default: 100
 
 [selection_tc]
 type: integer
@@ -160,3 +160,9 @@ doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS]
 interface: ezfio,provider,ocaml
 default: DIIS
 
+[im_thresh_tcscf]
+type: Threshold
+doc: Thresholds on the Imag part of energy
+interface: ezfio,provider,ocaml
+default: 1.e-7
+
diff --git a/src/tc_scf/diago_bi_ort_tcfock.irp.f b/src/tc_scf/diago_bi_ort_tcfock.irp.f
index 29ca0efe..9c571f8a 100644
--- a/src/tc_scf/diago_bi_ort_tcfock.irp.f
+++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f
@@ -76,6 +76,8 @@
             , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
             , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
 
+  ! ---
+
   accu_d  = 0.d0
   accu_nd = 0.d0
   do i = 1, mo_num
@@ -92,22 +94,24 @@
       endif
     enddo 
   enddo
-  accu_nd = dsqrt(accu_nd)/accu_d
-
+  accu_nd = dsqrt(accu_nd) / accu_d
   if(accu_nd .gt. thr_nd) then
     print *, ' bi-orthog failed'
-    print*,'accu_nd MO = ', accu_nd, thr_nd
-    print*,'overlap_fock_tc_eigvec_mo = '
+    print *, ' accu_nd MO = ', accu_nd, thr_nd
+    print *, ' overlap_fock_tc_eigvec_mo = '
     do i = 1, mo_num
       write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
     enddo
-   stop
+    stop
   endif
 
-  if( dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thr_d ) then
-    print *, 'mo_num     = ', mo_num 
-    print *, 'accu_d  MO = ', accu_d, thr_d
-    print *, 'normalizing vectors ...'
+  ! ---
+
+  if(dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thr_d) then
+
+    print *, ' mo_num     = ', mo_num 
+    print *, ' accu_d  MO = ', accu_d, thr_d
+    print *, ' normalizing vectors ...'
     do i = 1, mo_num
       norm = dsqrt(dabs(overlap_fock_tc_eigvec_mo(i,i)))
       if(norm .gt. thr_d) then
@@ -117,12 +121,43 @@
         enddo
       endif
     enddo
+
     call dgemm( "T", "N", mo_num, mo_num, mo_num, 1.d0          &
               , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) &
               , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
               , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
+
+    accu_d  = 0.d0
+    accu_nd = 0.d0
+    do i = 1, mo_num
+      do k = 1, mo_num
+        if(i==k) then
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_d  += dabs(accu_tmp)
+        else
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_nd += accu_tmp * accu_tmp
+          if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thr_nd)then
+           print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
+          endif
+        endif
+      enddo 
+    enddo
+    accu_nd = dsqrt(accu_nd) / accu_d
+    if(accu_nd .gt. thr_nd) then
+      print *, ' bi-orthog failed'
+      print *, ' accu_nd MO = ', accu_nd, thr_nd
+      print *, ' overlap_fock_tc_eigvec_mo = '
+      do i = 1, mo_num
+        write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
+      enddo
+      stop
+    endif
+
   endif
  
+  ! ---
+
 END_PROVIDER 
 
 ! ---
diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f
index cf339175..ff1077f5 100644
--- a/src/tc_scf/diis_tcscf.irp.f
+++ b/src/tc_scf/diis_tcscf.irp.f
@@ -27,6 +27,7 @@ BEGIN_PROVIDER [double precision, Q_alpha, (ao_num, ao_num) ]
 
   implicit none
 
+  Q_alpha = 0.d0
   call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
             , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
             , 0.d0, Q_alpha, size(Q_alpha, 1) )
@@ -47,6 +48,7 @@ BEGIN_PROVIDER [ double precision, Q_beta, (ao_num, ao_num) ]
 
   implicit none
 
+  Q_beta = 0.d0
   call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
             , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
             , 0.d0, Q_beta, size(Q_beta, 1) )
@@ -113,15 +115,18 @@ BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)]
             , 0.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
 
   ! S x Q
+  tmp = 0.d0
   call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                       &
             , ao_overlap, size(ao_overlap, 1), Q_matrix, size(Q_matrix, 1) &
             , 0.d0, tmp, size(tmp, 1) )
 
-  ! F x P x S - S x P x F
+  ! F x Q x S - S x Q x F
   call dgemm( 'N', 'N', ao_num, ao_num, ao_num, -1.d0                                  &
             , tmp, size(tmp, 1), Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
             , 1.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
 
+  deallocate(tmp)
+
 END_PROVIDER
 
 ! ---
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
index b9611836..c3642a7e 100644
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@@ -74,68 +74,61 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_beta, (ao_num, ao_num)]
                          + two_e_tc_non_hermit_integral_beta 
 
 END_PROVIDER 
-! ---
-
-!BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
-!  implicit none
-!  BEGIN_DOC
-! ! Total alpha+beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
-!  END_DOC
-!  Fock_matrix_tc_ao_tot = 0.5d0 * (Fock_matrix_tc_ao_alpha + Fock_matrix_tc_ao_beta)
-!END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
-  implicit none
+
   BEGIN_DOC
- ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
   END_DOC
-  if(bi_ortho)then
-   call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
-                         , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
-   if(three_body_h_tc)then
-    Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
-   endif
+
+  implicit none
+
+  if(bi_ortho) then
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+                          , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+    if(three_body_h_tc) then
+      Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
+    endif
+
   else
-   call ao_to_mo(  Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+    call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
                  , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+
   endif
+
 END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
-  implicit none
+
   BEGIN_DOC
- ! Total beta  TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the MO basis
   END_DOC
-  if(bi_ortho)then
+
+  implicit none
+
+  if(bi_ortho) then
+
    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
                          , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
-   if(three_body_h_tc)then
-    Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
-   endif
+
+    if(three_body_h_tc) then
+      Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
+    endif
+
   else
-   call ao_to_mo(  Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+
+    call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
                  , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+
   endif
+
 END_PROVIDER
 
-! ---
-
-!BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num, mo_num)]
-!  implicit none
-!  BEGIN_DOC
-! ! Total alpha+beta  TC Fock matrix : h_c + Two-e^TC terms on the MO basis
-!  END_DOC
-!  Fock_matrix_tc_mo_tot = 0.5d0 * (Fock_matrix_tc_mo_alpha + Fock_matrix_tc_mo_beta)
-!  if(three_body_h_tc) then
-!    Fock_matrix_tc_mo_tot += fock_3_mat
-!  endif
-!  !call restore_symmetry(mo_num, mo_num, Fock_matrix_tc_mo_tot, mo_num, 1.d-10)
-!END_PROVIDER 
-
 ! ---
 
  BEGIN_PROVIDER [ double precision, grad_non_hermit_left]
diff --git a/src/tc_scf/fock_three.irp.f b/src/tc_scf/fock_three.irp.f
index f73a5049..35b6aac6 100644
--- a/src/tc_scf/fock_three.irp.f
+++ b/src/tc_scf/fock_three.irp.f
@@ -70,52 +70,72 @@ subroutine give_fock_ia_three_e_total(i,a,contrib)
 
 end
 
+! ---
+
 BEGIN_PROVIDER [double precision, diag_three_elem_hf]
- implicit none
- integer :: i,j,k,ipoint,mm
- double precision :: contrib,weight,four_third,one_third,two_third,exchange_int_231
- print*,'providing diag_three_elem_hf'
- if(.not.three_body_h_tc)then
-  diag_three_elem_hf = 0.d0
- else
-  if(.not.bi_ortho)then
-   one_third = 1.d0/3.d0
-   two_third = 2.d0/3.d0
-   four_third = 4.d0/3.d0
-   diag_three_elem_hf = 0.d0
-   do i = 1, elec_beta_num
-    do j = 1, elec_beta_num
-     do k = 1, elec_beta_num
-      call  give_integrals_3_body(k,j,i,j,i,k,exchange_int_231)   
-      diag_three_elem_hf += two_third * exchange_int_231
-     enddo
-    enddo
-   enddo
-   do mm = 1, 3
-    do ipoint = 1, n_points_final_grid
-     weight = final_weight_at_r_vector(ipoint)                                                                          
-     contrib   = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm)  & 
-                -2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm)                                 & 
-                -1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm)
-     contrib  *= four_third
-     contrib  += -two_third  * fock_3_rho_beta(ipoint)     * fock_3_w_kl_w_kl(ipoint,mm) & 
-                - four_third * fock_3_w_kk_sum(ipoint,mm)  * fock_3_w_kl_mo_k_mo_l(ipoint,mm)
-     diag_three_elem_hf += weight * contrib
-    enddo
-   enddo
-   diag_three_elem_hf = - diag_three_elem_hf
+
+  implicit none
+  integer          :: i, j, k, ipoint, mm
+  double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231
+  double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb
+
+  !print *, ' providing diag_three_elem_hf'
+
+  if(.not. three_body_h_tc) then
+
+    diag_three_elem_hf = 0.d0
+
   else
-   double precision :: integral_aaa,hthree, integral_aab,integral_abb,integral_bbb
-   provide mo_l_coef mo_r_coef
-   call give_aaa_contrib(integral_aaa)
-   call give_aab_contrib(integral_aab)
-   call give_abb_contrib(integral_abb)
-   call give_bbb_contrib(integral_bbb)
-   diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+
+    if(.not. bi_ortho) then
+
+      ! ---
+
+      one_third  = 1.d0/3.d0
+      two_third  = 2.d0/3.d0
+      four_third = 4.d0/3.d0
+      diag_three_elem_hf = 0.d0
+      do i = 1, elec_beta_num
+        do j = 1, elec_beta_num
+          do k = 1, elec_beta_num
+            call give_integrals_3_body(k, j, i, j, i, k,exchange_int_231)   
+            diag_three_elem_hf += two_third * exchange_int_231
+          enddo
+        enddo
+      enddo
+      do mm = 1, 3
+        do ipoint = 1, n_points_final_grid
+          weight  = final_weight_at_r_vector(ipoint)                                                                          
+          contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & 
+                  - 2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm)                     & 
+                  - 1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm)
+          contrib *= four_third
+          contrib += -two_third  * fock_3_rho_beta(ipoint)    * fock_3_w_kl_w_kl(ipoint,mm) & 
+                     -four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm)
+          diag_three_elem_hf += weight * contrib
+       enddo
+      enddo
+
+      diag_three_elem_hf = - diag_three_elem_hf
+
+      ! ---
+
+    else
+
+      provide mo_l_coef mo_r_coef
+      call give_aaa_contrib(integral_aaa)
+      call give_aab_contrib(integral_aab)
+      call give_abb_contrib(integral_abb)
+      call give_bbb_contrib(integral_bbb)
+      diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+
+    endif
+
   endif
- endif
+
 END_PROVIDER 
 
+! ---
 
 BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh, (mo_num, mo_num)]
  implicit none 
diff --git a/src/tc_scf/rh_tcscf.irp.f b/src/tc_scf/rh_tcscf.irp.f
index dc7a34fc..597c3e67 100644
--- a/src/tc_scf/rh_tcscf.irp.f
+++ b/src/tc_scf/rh_tcscf.irp.f
@@ -16,7 +16,8 @@ subroutine rh_tcscf()
   double precision              :: energy_TCSCF_previous, delta_energy_TCSCF
   double precision              :: gradie_TCSCF_previous, delta_gradie_TCSCF
   double precision              :: max_error_DIIS_TCSCF
-  double precision              :: level_shift_TCSCF_save
+  double precision              :: level_shift_save
+  double precision              :: delta_energy_tmp, delta_gradie_tmp
   double precision, allocatable :: F_DIIS(:,:,:), e_DIIS(:,:,:)
   double precision, allocatable :: mo_r_coef_save(:,:), mo_l_coef_save(:,:)
 
@@ -60,8 +61,8 @@ subroutine rh_tcscf()
   PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot
 
   do while( (max_error_DIIS_TCSCF > threshold_DIIS_nonzero_TCSCF) .or. &
-            (dabs(delta_energy_TCSCF) > thresh_TCSCF)             .or. &
-            (dabs(delta_gradie_TCSCF) > dsqrt(thresh_TCSCF))           )
+            !(dabs(delta_energy_TCSCF) > thresh_TCSCF)             .or. &
+            (dabs(gradie_TCSCF_previous) > dsqrt(thresh_TCSCF))        )
 
     iteration_TCSCF += 1
     if(iteration_TCSCF > n_it_TCSCF_max) then
@@ -69,11 +70,6 @@ subroutine rh_tcscf()
       exit
     endif
 
-    ! TODO
-    !if(frozen_orb_scf) then
-    !  call initialize_mo_coef_begin_iteration
-    !endif
-
     ! current size of the DIIS space
     dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF)
 
@@ -91,13 +87,19 @@ subroutine rh_tcscf()
       enddo
 
       ! Compute the extrapolated Fock matrix
-      call extrapolate_TC_Fock_matrix( e_DIIS, F_DIIS                   &
+      call extrapolate_TC_Fock_matrix( e_DIIS, F_DIIS                                        &
                                      , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
                                      , iteration_TCSCF, dim_DIIS )
 
       Fock_matrix_tc_ao_alpha = 0.5d0 * Fock_matrix_tc_ao_tot
       Fock_matrix_tc_ao_beta  = 0.5d0 * Fock_matrix_tc_ao_tot
-      TOUCH Fock_matrix_tc_ao_alpha Fock_matrix_tc_ao_beta
+      !TOUCH Fock_matrix_tc_ao_alpha Fock_matrix_tc_ao_beta
+
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+                            , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta , size(Fock_matrix_tc_ao_beta , 1) &
+                            , Fock_matrix_tc_mo_beta , size(Fock_matrix_tc_mo_beta , 1) )
+      TOUCH Fock_matrix_tc_mo_alpha Fock_matrix_tc_mo_beta
 
     endif
 
@@ -109,15 +111,54 @@ subroutine rh_tcscf()
 
     ! ---
 
-    ! TODO
-    !if(frozen_orb_scf) then
-    ! call reorder_core_orb
-    ! call initialize_mo_coef_begin_iteration
-    !endif
-
     ! calculate error vectors
     max_error_DIIS_TCSCF = maxval(abs(FQS_SQF_mo))
 
+    ! ---
+
+    delta_energy_tmp = TC_HF_energy    - energy_TCSCF_previous
+    delta_gradie_tmp = grad_non_hermit - gradie_TCSCF_previous
+
+    ! ---
+
+    do while((dabs(delta_energy_tmp) > 0.1d0) .and. (iteration_TCSCF > 1))
+!      print *, ' very big step  : ', delta_energy_tmp
+!      print *, ' TC level shift = ', level_shift_TCSCF
+
+      mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num) 
+      mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num) 
+
+      if(level_shift_TCSCF <= .1d0) then
+        level_shift_TCSCF = 1.d0
+      else
+        level_shift_TCSCF = level_shift_TCSCF * 3.0d0
+      endif
+      TOUCH mo_l_coef mo_r_coef level_shift_TCSCF
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      TOUCH mo_l_coef mo_r_coef
+
+      delta_energy_tmp = TC_HF_energy - energy_TCSCF_previous
+
+      if(level_shift_TCSCF - level_shift_save > 40.d0) then
+        level_shift_TCSCF = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift_TCSCF
+        exit
+      endif
+
+      dim_DIIS = 0
+    enddo
+!    print *, ' very big step  : ', delta_energy_tmp
+!    print *, ' TC level shift = ', level_shift_TCSCF
+
+    ! ---
+
+    level_shift_TCSCF = 0.d0
+    !level_shift_TCSCF = level_shift_TCSCF * 0.5d0
+    SOFT_TOUCH level_shift_TCSCF
+
+    gradie_TCSCF       = grad_non_hermit
     energy_TCSCF       = TC_HF_energy
     energy_TCSCF_1e    = TC_HF_one_e_energy
     energy_TCSCF_2e    = TC_HF_two_e_energy
@@ -125,78 +166,17 @@ subroutine rh_tcscf()
     if(three_body_h_tc) then
       energy_TCSCF_3e  = diag_three_elem_hf
     endif
-    gradie_TCSCF       = grad_non_hermit
     delta_energy_TCSCF = energy_TCSCF - energy_TCSCF_previous
     delta_gradie_TCSCF = gradie_TCSCF - gradie_TCSCF_previous
 
-    if((TCSCF_algorithm == 'DIIS') .and. (delta_gradie_TCSCF > 0.d0)) then
-      Fock_matrix_tc_ao_tot(1:ao_num,1:ao_num) = F_DIIS(1:ao_num,1:ao_num,index_dim_DIIS)
-      Fock_matrix_tc_ao_alpha = 0.5d0 * Fock_matrix_tc_ao_tot
-      Fock_matrix_tc_ao_beta  = 0.5d0 * Fock_matrix_tc_ao_tot
-      TOUCH Fock_matrix_tc_ao_alpha Fock_matrix_tc_ao_beta
-    endif
-
-    ! ---
-
-    level_shift_TCSCF_save = level_shift_TCSCF
-
-    mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num)
-    mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num)
-
-    do while(delta_gradie_TCSCF > 0.d0)
-
-      mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num)
-      mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num)
-
-      if(level_shift_TCSCF <= .1d0) then
-        level_shift_TCSCF = 1.d0
-      else
-        level_shift_TCSCF = level_shift_TCSCF * 3.0d0
-      endif
-      TOUCH mo_r_coef mo_l_coef level_shift_TCSCF
-
-      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
-      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
-
-      !if(frozen_orb_scf) then
-      !  call reorder_core_orb
-      !  call initialize_mo_coef_begin_iteration
-      !endif
-      TOUCH mo_l_coef mo_r_coef
-
-      energy_TCSCF       = TC_HF_energy
-      energy_TCSCF_1e    = TC_HF_one_e_energy
-      energy_TCSCF_2e    = TC_HF_two_e_energy
-      energy_TCSCF_3e    = 0.d0
-      if(three_body_h_tc) then
-        energy_TCSCF_3e  = diag_three_elem_hf
-      endif
-      gradie_TCSCF       = grad_non_hermit
-      delta_energy_TCSCF = energy_TCSCF - energy_TCSCF_previous
-      delta_gradie_TCSCF = gradie_TCSCF - gradie_TCSCF_previous
-
-      if(level_shift_TCSCF - level_shift_TCSCF_save > 40.d0) then
-        level_shift_TCSCF = level_shift_TCSCF_save * 4.d0
-        SOFT_TOUCH level_shift_TCSCF
-        exit
-      endif
-
-      dim_DIIS = 0
-    enddo
-
-    ! ---
-
-    level_shift_TCSCF = level_shift_TCSCF * 0.5d0
-    SOFT_TOUCH level_shift_TCSCF
-
     energy_TCSCF_previous = energy_TCSCF
-    energy_TCSCF_1e       = TC_HF_one_e_energy
-    energy_TCSCF_2e       = TC_HF_two_e_energy
-    energy_TCSCF_3e       = 0.d0
-    if(three_body_h_tc) then
-      energy_TCSCF_3e     = diag_three_elem_hf
-    endif
-    gradie_TCSCF_previous = grad_non_hermit
+    gradie_TCSCF_previous = gradie_TCSCF
+
+
+    level_shift_save = level_shift_TCSCF
+    mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num)
+    mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num)
+
 
     print *, ' iteration         = ', iteration_TCSCF
     print *, ' total TC energy   = ', energy_TCSCF 
@@ -204,36 +184,25 @@ subroutine rh_tcscf()
     print *, ' 2-e   TC energy   = ', energy_TCSCF_2e
     print *, ' 3-e   TC energy   = ', energy_TCSCF_3e
     print *, ' |delta TC energy| = ', delta_energy_TCSCF
+    print *, ' TC gradient       = ', gradie_TCSCF
     print *, ' delta TC gradient = ', delta_gradie_TCSCF
     print *, ' max TC DIIS error = ', max_error_DIIS_TCSCF 
     print *, ' TC DIIS dim       = ', dim_DIIS
     print *, ' TC level shift    = ', level_shift_TCSCF
+    print *, ' '
 
-    if(delta_gradie_TCSCF < 0.d0) then
-      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
-      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
-      call ezfio_set_tc_scf_bitc_energy(energy_TCSCF)
-    endif
+    call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+    call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
 
     if(qp_stop()) exit
   enddo
 
   ! ---
 
-  !if(iteration_TCSCF < n_it_TCSCF_max) then
-  !  mo_label = 'Canonical'
-  !endif
-
-  !if(.not.frozen_orb_scf) then
-  ! call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo, size(Fock_matrix_mo,1), size(Fock_matrix_mo, 2), mo_label, 1, .true.)
-  ! call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
-  ! call orthonormalize_mos
-  ! call save_mos
-  !endif
-  !call write_double(6, energy_TCSCF, 'TCSCF energy')
-
   call write_time(6)
 
+  deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, e_DIIS)
+
 end
 
 ! ---
diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f
index 42925e41..15264768 100644
--- a/src/tc_scf/routines_rotates.irp.f
+++ b/src/tc_scf/routines_rotates.irp.f
@@ -116,7 +116,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
     print *, ' ------------------------------------'
     call orthog_functions(ao_num, n_degen, mo_l_coef_tmp, ao_overlap)
 
-    print *, ' Overlap lef-right '
+    print *, ' Overlap left-right '
     call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp)
     do j = 1, n_degen
      write(*,'(100(F8.4,X))') stmp(:,j)
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
index 2b751e50..283ec2ae 100644
--- a/src/tc_scf/tc_scf.irp.f
+++ b/src/tc_scf/tc_scf.irp.f
@@ -15,8 +15,8 @@ program tc_scf
 !  my_n_pt_a_grid = 26 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 
-  !call create_guess
-  !call orthonormalize_mos
+  call create_guess()
+  call orthonormalize_mos()
 
   PROVIDE tcscf_algorithm
   if(tcscf_algorithm == 'DIIS') then
@@ -42,7 +42,8 @@ subroutine create_guess
   logical :: exists
 
   PROVIDE ezfio_filename
-  call ezfio_has_mo_basis_mo_coef(exists)
+  !call ezfio_has_mo_basis_mo_coef(exists)
+  exists = .false.
 
   if (.not.exists) then
     mo_label = 'Guess'
@@ -106,7 +107,7 @@ subroutine simple_tcscf()
 
   else
 
-   print*,'grad_hermit = ',grad_hermit
+   print *, ' grad_hermit = ', grad_hermit
    call save_good_hermit_tc_eigvectors
    TOUCH mo_coef 
    call save_mos
@@ -117,58 +118,70 @@ subroutine simple_tcscf()
 
   if(bi_ortho) then
 
-    !do while( it .lt. n_it_tcscf_max .and. (e_delta .gt. dsqrt(thresh_tcscf)) )
-    !do while( it .lt. n_it_tcscf_max .and. (e_delta .gt. thresh_tcscf) )
-    !do while( it .lt. n_it_tcscf_max .and. (rho_delta .gt. thresh_tcscf) )
-    do while( it .lt. n_it_tcscf_max .and. (grad_non_hermit_right.gt. dsqrt(thresh_tcscf)) )
+    !do while(e_delta .gt. dsqrt(thresh_tcscf)) )
+    !do while(e_delta .gt. thresh_tcscf) )
+    !do while(rho_delta .gt. thresh_tcscf) )
+    !do while(grad_non_hermit_right .gt. dsqrt(thresh_tcscf))
+    do while(grad_non_hermit .gt. dsqrt(thresh_tcscf))
 
       it += 1
-      print*,'iteration = ', it
-      print*,'***'
-      print*,'TC HF total energy = ', TC_HF_energy
-      print*,'TC HF 1 e   energy = ', TC_HF_one_e_energy
-      print*,'TC HF 2 non hermit = ', TC_HF_two_e_energy
-      if(three_body_h_tc)then
-       print*,'TC HF 3 body       = ', diag_three_elem_hf
+      if(it > n_it_tcscf_max) then
+        print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+        exit
       endif
-      print*,'***'
-      e_delta = dabs( TC_HF_energy - e_save )
-      print*, 'it, delta E = ', it, e_delta
-      print*, 'it, gradient= ',grad_non_hermit_right
+
+
+      print *, ' ***'
+      print *, ' iteration = ', it
+
+      print *, ' TC HF total energy = ', TC_HF_energy
+      print *, ' TC HF 1 e   energy = ', TC_HF_one_e_energy
+      print *, ' TC HF 2 non hermit = ', TC_HF_two_e_energy
+      if(three_body_h_tc) then
+        print *, ' TC HF 3 body       = ', diag_three_elem_hf
+      endif
+      e_delta = dabs(TC_HF_energy - e_save)
+
+      print *, ' delta E  = ', e_delta
+      print *, ' gradient = ', grad_non_hermit
+      !print *, ' gradient= ', grad_non_hermit_right
+
+      !rho_new   = TCSCF_bi_ort_dm_ao
+      !!print*, rho_new
+      !rho_delta = 0.d0
+      !do i = 1, ao_num 
+      !  do j = 1, ao_num 
+      !    rho_delta += dabs(rho_new(j,i) - rho_old(j,i))
+      !  enddo
+      !enddo
+      !print *, ' rho_delta =', rho_delta
+      !rho_old = rho_new
+
       e_save    = TC_HF_energy
       mo_l_coef = fock_tc_leigvec_ao
       mo_r_coef = fock_tc_reigvec_ao
-
-      rho_new   = TCSCF_bi_ort_dm_ao
-      !print*, rho_new
-      rho_delta = 0.d0
-      do i = 1, ao_num 
-        do j = 1, ao_num 
-          rho_delta += dabs(rho_new(j,i) - rho_old(j,i))
-        enddo
-      enddo
-      print*, ' rho_delta =', rho_delta
-      rho_old = rho_new
-
       call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
       call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
       TOUCH mo_l_coef mo_r_coef
-
       call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
 
+      print *, ' ***'
+      print *, ''
+
     enddo
 
   else
    do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. it .lt. n_it_tcscf_max )
       print*,'grad_hermit = ',grad_hermit
       it += 1
-      print*,'iteration = ', it
-      print*,'***'
-      print*,'TC HF total energy = ', TC_HF_energy
-      print*,'TC HF 1 e   energy = ', TC_HF_one_e_energy
-      print*,'TC HF 2 e   energy = ', TC_HF_two_e_energy
-      print*,'TC HF 3 body       = ', diag_three_elem_hf
-      print*,'***'
+      print *, 'iteration = ', it
+      print *, '***'
+      print *, 'TC HF total energy = ', TC_HF_energy
+      print *, 'TC HF 1 e   energy = ', TC_HF_one_e_energy
+      print *, 'TC HF 2 e   energy = ', TC_HF_two_e_energy
+      print *, 'TC HF 3 body       = ', diag_three_elem_hf
+      print *, '***'
+      print *, ''
       call save_good_hermit_tc_eigvectors
       TOUCH mo_coef 
       call save_mos

From 7ce04cdd6a1a058ad022f023e8661cc4c8393a7f Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Thu, 8 Dec 2022 17:44:40 +0100
Subject: [PATCH 05/68] minor modifs

---
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f | 338 +++++++++++++-------
 src/ao_many_one_e_ints/listj1b_sorted.irp.f |  95 +++---
 src/tc_scf/test_int.irp.f                   | 127 +++++---
 3 files changed, 357 insertions(+), 203 deletions(-)

diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
index 65966c81..9bb16475 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -7,121 +7,6 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p
   !
   END_DOC
 
-  implicit none
-  integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
-  double precision              :: coef, beta, B_center(3), dist
-  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
-  double precision              :: wall0, wall1
-  double precision, external    :: NAI_pol_mult_erf_ao_with1s
-  double precision :: j12_mu_r12
-  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
-  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
-
-  provide mu_erf final_grid_points j1b_pen ao_overlap_abs
-  call wall_time(wall0)
-
-
-  int2_u_grad1u_j1b2_test = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
- !$OMP          sigma_ij, beta_ij, factor_ij_1s,center_ij_1s, dist_ij_ipoint,     &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, &
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       &
- !$OMP          List_all_comb_b3_cent, int2_u_grad1u_j1b2_test)
- !$OMP DO
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
-        dist_ij_ipoint = ao_prod_dist_grid(j,i,ipoint) ! distance to the grid point for the distribution |chi_i(r)chi_j(r)|
-        sigma_ij = ao_prod_sigma(j,i)                  ! typical spatial extension of the distribution |chi_i(r)chi_j(r)|
-        r(1) = final_grid_points(1,ipoint)
-        r(2) = final_grid_points(2,ipoint)
-        r(3) = final_grid_points(3,ipoint)
-
-        tmp = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
-
-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-!          if(beta.gt.1.d3)cycle
-          if(dabs(coef).lt.1.d-10)cycle
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
-          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
-                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
-                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
-          sigma_ij = 1.d0/sigma_ij
-          sigma_ij *= sigma_ij
-          sigma_ij *= 0.5d0
-          double precision :: beta_ij, factor_ij_1s, center_ij_1s(3)
-!          call gaussian_product(sigma_ij,ao_prod_center(1:3,j,i),beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-!          if(factor_ij_1s*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
-!          if(factor_ij_1s*dsqpi_3_2*(beta_ij)**(-3/2)*ao_overlap_abs_grid(j,i).lt.1.d-20)cycle
-
-          do i_fit = 1, n_max_fit_slat
-
-            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
-            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-            if(factor_ij_1s*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
-            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
-
-            alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s
-            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
-            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
-            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
-
-            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-            if(expo_coef_1s .gt. 20.d0) cycle
-            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-08) cycle
-
-            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
-
-            tmp += coef_tmp * int_fit
-          enddo
-        enddo
-
-        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  do ipoint = 1, n_points_final_grid
-    do i = 2, ao_num
-      do j = 1, i-1
-        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
-
-END_PROVIDER
-
-! ---
-
-
-BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test_2, (ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
-  !
-  END_DOC
-
   implicit none
   integer                       :: i, j, ipoint, i_1s, i_fit
   double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
@@ -138,7 +23,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test_2, (ao_num, ao_num, n
   call wall_time(wall0)
 
 
-  int2_u_grad1u_j1b2_test_2 = 0.d0
+  int2_u_grad1u_j1b2_test = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
@@ -150,7 +35,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test_2, (ao_num, ao_num, n
  !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
  !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
  !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
- !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test_2)
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test)
  !$OMP DO
   do ipoint = 1, n_points_final_grid
     do i = 1, ao_num
@@ -198,7 +83,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test_2, (ao_num, ao_num, n
           enddo
         enddo
 
-        int2_u_grad1u_j1b2_test_2(j,i,ipoint) = tmp
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
       enddo
     enddo
   enddo
@@ -208,15 +93,228 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test_2, (ao_num, ao_num, n
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
       do j = 1, i-1
-        int2_u_grad1u_j1b2_test_2(j,i,ipoint) = int2_u_grad1u_j1b2_test_2(i,j,ipoint)
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_u_grad1u_j1b2_test_2', wall1 - wall0
+  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
 
 END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+
+  double precision, allocatable :: int_fit_v(:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),int_j1b,int_gauss,dsqpi_3_2
+  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+
+  provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
+  call wall_time(wall0)
+
+ int2_grad1u2_grad2u2_j1b2_test_no_v(:,:,:) = 0.d0
+
+! !$OMP PARALLEL DEFAULT (NONE)                                       &
+!     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+!     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s)                &
+!     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_b3_size_thr,&
+!     !$OMP          final_grid_points_transp, n_max_fit_slat,               &
+!     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+!     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
+!     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test_no_v, ao_abs_comb_b3_j1b,&
+!     !$OMP          ao_overlap_abs,dsqpi_3_2)
+! !$OMP DO SCHEDULE(dynamic)
+!   do i = 1, ao_num
+!     do j = 1, ao_num
+   do i = 14,14
+     do j = 17,17
+       if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+         cycle
+       endif
+  
+!       if(ipoint==1)then
+!        if(i+j.lt.10)then
+!        print*,j,i
+!        endif
+!       endif
+!       do i_1s = 1, List_comb_b3_size_thr(j,i)
+       do i_1s = 1, 1
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+!         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+!       if(ipoint==1)then
+!        if(i+j.lt.10)then
+!         print*,coef,beta
+!         print*,B_center
+!        endif
+!       endif
+  
+!         do i_fit = 1, n_max_fit_slat
+         do i_fit = 15,15
+        if(j==17.and.i==14)then
+         print*,i_fit,i_1s
+        endif
+! do ipoint = 1, n_points_final_grid
+ do ipoint = 4,4
+   r(1) = final_grid_points(1,ipoint)
+   r(2) = final_grid_points(2,ipoint)
+   r(3) = final_grid_points(3,ipoint)
+  
+           expo_fit = expo_gauss_1_erf_x_2(i_fit)
+!           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+           coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+!           if(dabs(coef_fit)*factor_ij_1s*dabs(int_j1b).lt.1.d-15)cycle
+  
+!           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+!                 expo_fit, i, j, int_fit_v, n_points_final_grid)
+!           if(ipoint == 4)then
+!            print*,'ipoint == 4 !!'
+!           endif
+           int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+  
+           int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) += coef_fit * int_gauss 
+  
+         enddo
+        enddo
+       enddo
+     enddo
+   enddo
+
+!   !$OMP END DO
+!   !$OMP END PARALLEL
+
+!  do ipoint = 1, n_points_final_grid
+!    do i = 1, ao_num
+!      do j = 1, i-1
+!        int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint)
+!      enddo
+!    enddo
+!  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_no_v', wall1 - wall0
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+
+  double precision, allocatable :: int_fit_v(:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  provide mu_erf final_grid_points_transp j1b_pen
+  call wall_time(wall0)
+
+ double precision :: int_j1b
+ int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
+
+! !$OMP PARALLEL DEFAULT (NONE)                                       &
+!     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+!     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
+!     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b3_size_thr,&
+!     !$OMP          final_grid_points_transp, n_max_fit_slat,               &
+!     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+!     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
+!     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test,&
+!     !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
+
+ allocate(int_fit_v(n_points_final_grid))
+ !$OMP DO SCHEDULE(dynamic)
+! do i = 1, ao_num
+!   do j = i, ao_num
+   do i = 14,14
+     do j = 17,17
+
+     if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+       cycle
+     endif
+!        if(i+j.lt.10)then
+!        print*,j,i
+!        endif
+
+!      do i_1s = 1, List_comb_b3_size_thr(j,i)
+      do i_1s = 1, 1
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+!         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+!        if(i+j.lt.10)then
+!         print*,coef,beta
+!         print*,B_center
+!        endif
+
+!       do i_fit = 1, n_max_fit_slat
+         do i_fit = 15,15
+
+         expo_fit = expo_gauss_1_erf_x_2(i_fit)
+         coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+
+        if(j==17.and.i==14)then
+         print*,i_fit,i_1s
+        endif
+         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+               expo_fit, i, j, int_fit_v, n_points_final_grid)
+
+         do ipoint = 1, n_points_final_grid
+           int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+         enddo
+
+       enddo
+
+     enddo
+   enddo
+ enddo
+! !$OMP END DO
+ deallocate(int_fit_v)
+! !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
index 58c77f5c..4ef5db99 100644
--- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -13,6 +13,7 @@
      coef        = List_all_comb_b2_coef  (i_1s)
      if(dabs(coef).lt.1.d-10)cycle
      beta        = List_all_comb_b2_expo  (i_1s)
+     beta = max(beta,1.d-10)
      center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
      int_j1b = 0.d0
      do ipoint = 1, n_points_final_grid
@@ -92,46 +93,48 @@ END_PROVIDER
  
 END_PROVIDER 
 
+
  BEGIN_PROVIDER [ integer, List_comb_b3_size_thr, (ao_num, ao_num)]
 &BEGIN_PROVIDER [ integer, max_List_comb_b3_size_thr]
  implicit none
  integer :: i_1s,i,j,ipoint
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-14
+ thr = 1.d-10
  List_comb_b3_size_thr = 0
  do i = 1, ao_num
-  do j = i, ao_num
+  do j = 1, ao_num
    do i_1s = 1, List_all_comb_b3_size
      coef        = List_all_comb_b3_coef  (i_1s)
-     if(dabs(coef).lt.thr)cycle
      beta        = List_all_comb_b3_expo  (i_1s)
      center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
-     int_j1b = 0.d0
-     do ipoint = 1, n_points_final_grid
-      r(1:3) = final_grid_points(1:3,ipoint)
-      weight = final_weight_at_r_vector(ipoint)
-      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
-      dist += ( center(2) - r(2) )*( center(2) - r(2) )
-      dist += ( center(3) - r(3) )*( center(3) - r(3) )
-      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
-     enddo
-     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+     if(dabs(coef).lt.thr)cycle
+!     int_j1b = 0.d0
+!     do ipoint = 1, n_points_final_grid
+!      r(1:3) = final_grid_points(1:3,ipoint)
+!      weight = final_weight_at_r_vector(ipoint)
+!      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+!      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+!      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+!      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+!     enddo
+!     if(dabs(coef)*dabs(int_j1b).gt.thr)then
       List_comb_b3_size_thr(j,i) += 1
-     endif
+!     endif
    enddo
   enddo 
  enddo
- do i = 1, ao_num
-  do j = 1, i-1
-    List_comb_b3_size_thr(j,i) = List_comb_b3_size_thr(i,j)
-  enddo
- enddo
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    List_comb_b3_size_thr(j,i) = List_comb_b3_size_thr(i,j)
+!  enddo
+! enddo
  integer :: list(ao_num)
  do i = 1, ao_num
   list(i) = maxval(List_comb_b3_size_thr(:,i))
  enddo
  max_List_comb_b3_size_thr = maxval(list) 
+ print*,'max_List_comb_b3_size_thr =  ',max_List_comb_b3_size_thr
  
 END_PROVIDER 
 
@@ -143,46 +146,46 @@ END_PROVIDER
  integer :: i_1s,i,j,ipoint,icount
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-14
+ thr = 1.d-10
  ao_abs_comb_b3_j1b = 10000000.d0
  do i = 1, ao_num
-  do j = i, ao_num
+  do j = 1, ao_num
    icount = 0
    do i_1s = 1, List_all_comb_b3_size
      coef        = List_all_comb_b3_coef  (i_1s)
-     if(dabs(coef).lt.thr)cycle
      beta        = List_all_comb_b3_expo  (i_1s)
+     beta = max(beta,1.d-10)
      center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
-     int_j1b = 0.d0
-     do ipoint = 1, n_points_final_grid
-      r(1:3) = final_grid_points(1:3,ipoint)
-      weight = final_weight_at_r_vector(ipoint)
-      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
-      dist += ( center(2) - r(2) )*( center(2) - r(2) )
-      dist += ( center(3) - r(3) )*( center(3) - r(3) )
-      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
-     enddo
-     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+     if(dabs(coef).lt.thr)cycle
+!     int_j1b = 0.d0
+!     do ipoint = 1, n_points_final_grid
+!      r(1:3) = final_grid_points(1:3,ipoint)
+!      weight = final_weight_at_r_vector(ipoint)
+!      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+!      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+!      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+!      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+!     enddo
+!     if(dabs(coef)*dabs(int_j1b).gt.thr)then
       icount += 1
       List_comb_thr_b3_coef(icount,j,i) = coef
       List_comb_thr_b3_expo(icount,j,i) = beta
       List_comb_thr_b3_cent(1:3,icount,j,i) = center(1:3)
-      ao_abs_comb_b3_j1b(icount,j,i) = int_j1b
-     endif
+!      ao_abs_comb_b3_j1b(icount,j,i) = int_j1b
+!     endif
    enddo
   enddo 
  enddo
- do i = 1, ao_num
-  do j = 1, i-1
-    do icount = 1, List_comb_b3_size_thr(j,i)
-     List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j)
-     List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j)
-     List_comb_thr_b3_cent(1,icount,j,i) = List_comb_thr_b3_cent(1,icount,i,j)
-     List_comb_thr_b3_cent(2,icount,j,i) = List_comb_thr_b3_cent(2,icount,i,j)
-     List_comb_thr_b3_cent(3,icount,j,i) = List_comb_thr_b3_cent(3,icount,i,j)
-    enddo
-  enddo
- enddo
- 
+
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    do icount = 1, List_comb_b3_size_thr(j,i)
+!     List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j)
+!     List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j)
+!     List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j)
+!    enddo
+!  enddo
+! enddo
  
 END_PROVIDER 
+
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index 69953f02..c470d1b4 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -11,39 +11,47 @@ program test_ints
   my_grid_becke  = .True.
 !  my_n_pt_r_grid = 30
 !  my_n_pt_a_grid = 50
-  my_n_pt_r_grid = 10 ! small grid for quick debug
-  my_n_pt_a_grid = 26 ! small grid for quick debug
+  my_n_pt_r_grid = 3 ! small grid for quick debug
+  my_n_pt_a_grid = 6 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 ! call routine_int2_u_grad1u_j1b2 
 ! call routine_v_ij_erf_rk_cst_mu_j1b
 ! call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
- call routine_v_ij_u_cst_mu_j1b
+! call routine_v_ij_u_cst_mu_j1b
+
+!
 ! call routine_test_j1b
 
+ call routine_int2_grad1u2_grad2u2_j1b2
 end
 
 subroutine routine_test_j1b
  implicit none
  integer :: i,icount,j
  icount = 0
-! do i = 1, List_all_comb_b2_size
-!  if(dabs(List_all_comb_b2_coef(i)).gt.1.d-10)then
-!   icount += 1
-!  endif
-!  print*,i,List_all_comb_b2_expo(i),List_all_comb_b2_coef(i)
-! enddo
-! print*,'List_all_comb_b2_coef,icount = ',List_all_comb_b2_size
+ do i = 1, List_all_comb_b3_size
+  if(dabs(List_all_comb_b3_coef(i)).gt.1.d-10)then
+   print*,''
+   print*,List_all_comb_b3_expo(i),List_all_comb_b3_coef(i)
+   print*,List_all_comb_b3_cent(1:3,i)
+   print*,''
+   icount += 1
+  endif
+  
+ enddo
+ print*,'List_all_comb_b3_coef,icount = ',List_all_comb_b3_size,icount
  do i = 1, ao_num
   do j = 1, ao_num
    do icount = 1, List_comb_b3_size_thr(j,i)
-   print*,List_comb_thr_b3_cent(1:3,icount,j,i)
-!   print*,'',j,i
-!   print*,List_comb_b2_size_thr(j,i),List_comb_b3_size_thr(j,i),ao_overlap_abs_grid(j,i)
+    print*,'',j,i
+    print*,List_comb_thr_b3_expo(icount,j,i),List_comb_thr_b3_coef(icount,j,i)
+    print*,List_comb_thr_b3_cent(1:3,icount,j,i)
+    print*,''
    enddo
+!   enddo
   enddo
  enddo
- print*,'max_List_comb_b2_size_thr = ',max_List_comb_b2_size_thr,List_all_comb_b2_size
- print*,'max_List_comb_b2_size_thr = ',max_List_comb_b3_size_thr,List_all_comb_b3_size
+ print*,'max_List_comb_b3_size_thr = ',max_List_comb_b3_size_thr,List_all_comb_b3_size
 
 end
 
@@ -52,19 +60,6 @@ subroutine routine_int2_u_grad1u_j1b2
  integer :: i,j,ipoint,k,l
  double precision :: weight,accu_relat, accu_abs, contrib
  double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
-! print*,'ao_overlap_abs = '
-! do i = 1, ao_num
-!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
-! enddo
-! print*,'center = '
-! do i = 1, ao_num
-!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
-! enddo
-! print*,'sigma = '
-! do i = 1, ao_num
-!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
-! enddo
-
 
  allocate(array(ao_num, ao_num, ao_num, ao_num))
  array = 0.d0
@@ -76,15 +71,7 @@ subroutine routine_int2_u_grad1u_j1b2
    do l = 1, ao_num
     do i = 1, ao_num
      do j = 1, ao_num
-      array(j,i,l,k)     += int2_u_grad1u_j1b2_test_2(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-!      if(dabs(int2_u_grad1u_j1b2(j,i,ipoint)).gt.1.d-6)then
-!       if(dabs(int2_u_grad1u_j1b2_test_2(j,i,ipoint)-int2_u_grad1u_j1b2(j,i,ipoint)).gt.1.d-6)then
-!        print*,int2_u_grad1u_j1b2(j,i,ipoint), int2_u_grad1u_j1b2_test_2(j,i,ipoint),dabs(int2_u_grad1u_j1b2_test_2(j,i,ipoint)-int2_u_grad1u_j1b2(j,i,ipoint))
-!        print*,i,j
-!        print*,final_grid_points(:,i)
-!       stop
-!       endif
-!      endif
+      array(j,i,l,k)     += int2_u_grad1u_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
       array_ref(j,i,l,k) += int2_u_grad1u_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
      enddo
     enddo
@@ -290,4 +277,70 @@ subroutine routine_v_ij_u_cst_mu_j1b
 
   
 
+end
+
+subroutine routine_int2_grad1u2_grad2u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+ double precision, allocatable :: ints(:,:,:)
+ allocate(ints(ao_num, ao_num, n_points_final_grid))
+ do ipoint = 1, n_points_final_grid
+  do i = 1, ao_num
+   do j = 1, ao_num
+    read(33,*)ints(j,i,ipoint)
+   enddo
+  enddo
+ enddo
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint)).gt.1.d-6)then
+        print*,j,i,ipoint
+        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint))
+        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint))
+        stop
+       endif
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
 end

From 7578ce34505374cd74835e32d297a131393fdefc Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 9 Dec 2022 18:19:10 +0100
Subject: [PATCH 06/68] added test_int.irp.f

---
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   | 148 +++++++-----------
 src/ao_many_one_e_ints/grad2_jmu_modif.irp.f  |   6 +-
 .../grad_lapl_jmu_manu.irp.f                  |   4 +-
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   |  46 +++---
 src/ao_tc_eff_map/potential.irp.f             |  10 +-
 src/tc_bi_ortho/test_tc_fock.irp.f            |   2 +-
 src/tc_scf/test_int.irp.f                     |  10 +-
 7 files changed, 95 insertions(+), 131 deletions(-)

diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
index 9bb16475..c25d8055 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -31,7 +31,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p
  !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
  !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b3_size_thr, &
- !$OMP          final_grid_points, n_max_fit_slat,                  &
+ !$OMP          final_grid_points, ng_fit_jast,                  &
  !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
  !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
  !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
@@ -59,11 +59,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p
                       + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
                       + (B_center(3) - r(3)) * (B_center(3) - r(3))
 
-          do i_fit = 1, n_max_fit_slat
+          do i_fit = 1, ng_fit_jast
 
             expo_fit = expo_gauss_j_mu_1_erf(i_fit)
             call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
+!            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
             coef_fit = coef_gauss_j_mu_1_erf(i_fit)
 
             alpha_1s     = beta + expo_fit
@@ -130,31 +130,27 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
 
  int2_grad1u2_grad2u2_j1b2_test_no_v(:,:,:) = 0.d0
 
-! !$OMP PARALLEL DEFAULT (NONE)                                       &
-!     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
-!     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s)                &
-!     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_b3_size_thr,&
-!     !$OMP          final_grid_points_transp, n_max_fit_slat,               &
-!     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
-!     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
-!     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test_no_v, ao_abs_comb_b3_j1b,&
-!     !$OMP          ao_overlap_abs,dsqpi_3_2)
-! !$OMP DO SCHEDULE(dynamic)
-!   do i = 1, ao_num
-!     do j = 1, ao_num
-   do i = 14,14
-     do j = 17,17
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s)                &
+     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_b3_size_thr,&
+     !$OMP          final_grid_points_transp, ng_fit_jast,               &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
+     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test_no_v, ao_abs_comb_b3_j1b,&
+     !$OMP          ao_overlap_abs,dsqpi_3_2)
+ !$OMP DO SCHEDULE(dynamic)
+ do ipoint = 1, n_points_final_grid
+   r(1) = final_grid_points(1,ipoint)
+   r(2) = final_grid_points(2,ipoint)
+   r(3) = final_grid_points(3,ipoint)
+   do i = 1, ao_num
+     do j = i, ao_num
        if(ao_overlap_abs(j,i) .lt. 1.d-12) then
          cycle
        endif
   
-!       if(ipoint==1)then
-!        if(i+j.lt.10)then
-!        print*,j,i
-!        endif
-!       endif
-!       do i_1s = 1, List_comb_b3_size_thr(j,i)
-       do i_1s = 1, 1
+       do i_1s = 1, List_comb_b3_size_thr(j,i)
 
          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
@@ -163,23 +159,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
-!       if(ipoint==1)then
-!        if(i+j.lt.10)then
-!         print*,coef,beta
-!         print*,B_center
-!        endif
-!       endif
   
-!         do i_fit = 1, n_max_fit_slat
-         do i_fit = 15,15
-        if(j==17.and.i==14)then
-         print*,i_fit,i_1s
-        endif
-! do ipoint = 1, n_points_final_grid
- do ipoint = 4,4
-   r(1) = final_grid_points(1,ipoint)
-   r(2) = final_grid_points(2,ipoint)
-   r(3) = final_grid_points(3,ipoint)
+         do i_fit = 1, ng_fit_jast
   
            expo_fit = expo_gauss_1_erf_x_2(i_fit)
 !           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
@@ -188,9 +169,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
   
 !           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
 !                 expo_fit, i, j, int_fit_v, n_points_final_grid)
-!           if(ipoint == 4)then
-!            print*,'ipoint == 4 !!'
-!           endif
            int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
   
            int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) += coef_fit * int_gauss 
@@ -201,16 +179,16 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
      enddo
    enddo
 
-!   !$OMP END DO
-!   !$OMP END PARALLEL
+   !$OMP END DO
+   !$OMP END PARALLEL
 
-!  do ipoint = 1, n_points_final_grid
-!    do i = 1, ao_num
-!      do j = 1, i-1
-!        int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint)
-!      enddo
-!    enddo
-!  enddo
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint)
+      enddo
+    enddo
+  enddo
 
   call wall_time(wall1)
   print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_no_v', wall1 - wall0
@@ -218,13 +196,13 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
 END_PROVIDER
 
 BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
-  !
-  END_DOC
-
+!
+!  BEGIN_DOC
+!  !
+!  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+!  !
+!  END_DOC
+!
   implicit none
   integer                       :: i, j, ipoint, i_1s, i_fit
   double precision              :: r(3), expo_fit, coef_fit
@@ -240,33 +218,27 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
 
  double precision :: int_j1b
  int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
-
-! !$OMP PARALLEL DEFAULT (NONE)                                       &
-!     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
-!     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
-!     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b3_size_thr,&
-!     !$OMP          final_grid_points_transp, n_max_fit_slat,               &
-!     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
-!     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
-!     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test,&
-!     !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
-
+!
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
+     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b3_size_thr,&
+     !$OMP          final_grid_points_transp, ng_fit_jast,               &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
+     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test,&
+     !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
+!
  allocate(int_fit_v(n_points_final_grid))
  !$OMP DO SCHEDULE(dynamic)
-! do i = 1, ao_num
-!   do j = i, ao_num
-   do i = 14,14
-     do j = 17,17
+ do i = 1, ao_num
+   do j = i, ao_num
 
      if(ao_overlap_abs(j,i) .lt. 1.d-12) then
        cycle
      endif
-!        if(i+j.lt.10)then
-!        print*,j,i
-!        endif
 
-!      do i_1s = 1, List_comb_b3_size_thr(j,i)
-      do i_1s = 1, 1
+      do i_1s = 1, List_comb_b3_size_thr(j,i)
 
          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
@@ -275,22 +247,14 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
-!        if(i+j.lt.10)then
-!         print*,coef,beta
-!         print*,B_center
-!        endif
 
-!       do i_fit = 1, n_max_fit_slat
-         do i_fit = 15,15
+       do i_fit = 1, ng_fit_jast
 
          expo_fit = expo_gauss_1_erf_x_2(i_fit)
          coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
 
-        if(j==17.and.i==14)then
-         print*,i_fit,i_1s
-        endif
-         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
-               expo_fit, i, j, int_fit_v, n_points_final_grid)
+         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, size(final_grid_points_transp,1),&
+               expo_fit, i, j, int_fit_v, size(int_fit_v,1),n_points_final_grid)
 
          do ipoint = 1, n_points_final_grid
            int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
@@ -301,9 +265,9 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
      enddo
    enddo
  enddo
-! !$OMP END DO
+ !$OMP END DO
  deallocate(int_fit_v)
-! !$OMP END PARALLEL
+ !$OMP END PARALLEL
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index 39249e0a..872bfaef 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -351,7 +351,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
           ! ---
 
           int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r)
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(int_fit) .lt. 1d-10) cycle
 
           tmp += coef_fit * int_fit
 
@@ -375,9 +375,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
             centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-            if(expo_coef_1s .gt. 80.d0) cycle
+!            if(expo_coef_1s .gt. 80.d0) cycle
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
+!            if(dabs(coef_tmp) .lt. 1d-10) cycle
 
             int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
 
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
index b8c0801a..1b457d68 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -221,7 +221,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po
  !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
  !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
  !$OMP SHARED  (n_points_final_grid, ao_num,  & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
+ !$OMP          final_grid_points, ng_fit_jast,                  &
  !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
  !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_b2_size_thr,       & 
  !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b,      &
@@ -248,7 +248,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
 
-          do i_fit = 1, n_max_fit_slat
+          do i_fit = 1, ng_fit_jast
 
             expo_fit = expo_gauss_j_mu_x(i_fit)
             coef_fit = coef_gauss_j_mu_x(i_fit)
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
index 4ef5db99..606664f8 100644
--- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -109,18 +109,18 @@ END_PROVIDER
      beta        = List_all_comb_b3_expo  (i_1s)
      center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
      if(dabs(coef).lt.thr)cycle
-!     int_j1b = 0.d0
-!     do ipoint = 1, n_points_final_grid
-!      r(1:3) = final_grid_points(1:3,ipoint)
-!      weight = final_weight_at_r_vector(ipoint)
-!      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
-!      dist += ( center(2) - r(2) )*( center(2) - r(2) )
-!      dist += ( center(3) - r(3) )*( center(3) - r(3) )
-!      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
-!     enddo
-!     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_final_grid
+      r(1:3) = final_grid_points(1:3,ipoint)
+      weight = final_weight_at_r_vector(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
       List_comb_b3_size_thr(j,i) += 1
-!     endif
+     endif
    enddo
   enddo 
  enddo
@@ -157,22 +157,22 @@ END_PROVIDER
      beta = max(beta,1.d-10)
      center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
      if(dabs(coef).lt.thr)cycle
-!     int_j1b = 0.d0
-!     do ipoint = 1, n_points_final_grid
-!      r(1:3) = final_grid_points(1:3,ipoint)
-!      weight = final_weight_at_r_vector(ipoint)
-!      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
-!      dist += ( center(2) - r(2) )*( center(2) - r(2) )
-!      dist += ( center(3) - r(3) )*( center(3) - r(3) )
-!      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
-!     enddo
-!     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_final_grid
+      r(1:3) = final_grid_points(1:3,ipoint)
+      weight = final_weight_at_r_vector(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
       icount += 1
       List_comb_thr_b3_coef(icount,j,i) = coef
       List_comb_thr_b3_expo(icount,j,i) = beta
       List_comb_thr_b3_cent(1:3,icount,j,i) = center(1:3)
-!      ao_abs_comb_b3_j1b(icount,j,i) = int_j1b
-!     endif
+      ao_abs_comb_b3_j1b(icount,j,i) = int_j1b
+     endif
    enddo
   enddo 
  enddo
diff --git a/src/ao_tc_eff_map/potential.irp.f b/src/ao_tc_eff_map/potential.irp.f
index 67d572e5..37291563 100644
--- a/src/ao_tc_eff_map/potential.irp.f
+++ b/src/ao_tc_eff_map/potential.irp.f
@@ -165,7 +165,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 6.23519457d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -175,7 +175,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 55.39184787d0, 3.92151407d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -185,7 +185,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 19.90272209d0, 3.2671671d0 , 336.47320445d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -195,7 +195,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 6467.28126d0, 46.9071990d0, 9.09617721d0, 2.76883328d0, 360.367093d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -205,7 +205,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 2.54293498d+01, 1.40317872d+02, 7.14630801d+00, 2.65517675d+00, 1.45142619d+03, 1.00000000d+04 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f
index a49a5958..26446daf 100644
--- a/src/tc_bi_ortho/test_tc_fock.irp.f
+++ b/src/tc_bi_ortho/test_tc_fock.irp.f
@@ -32,7 +32,7 @@ subroutine test_3e
  print*,'htot  = ',htot
  print*,''
  print*,''
- print*,'TC_one= ',TC_HF_one_electron_energy
+ print*,'TC_one= ',tc_hf_one_e_energy
  print*,'TC_two= ',TC_HF_two_e_energy
  print*,'TC_3e = ',diag_three_elem_hf
  print*,'TC_tot= ',TC_HF_energy
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index c470d1b4..1d0b0f8c 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -305,16 +305,16 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
     do i = 1, ao_num
      do j = 1, ao_num
       array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-!      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
       array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-!      array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-!      array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-!      array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
        if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint)).gt.1.d-6)then
         print*,j,i,ipoint
         print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint))
-        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint))
+!        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint))
         stop
        endif
       endif

From ee987554e7c63476bb29372800c6998bfb8f8384 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 9 Dec 2022 18:29:52 +0100
Subject: [PATCH 07/68] minor modifs

---
 src/tc_scf/test_int.irp.f | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index 1d0b0f8c..6961d2f0 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -11,18 +11,18 @@ program test_ints
   my_grid_becke  = .True.
 !  my_n_pt_r_grid = 30
 !  my_n_pt_a_grid = 50
-  my_n_pt_r_grid = 3 ! small grid for quick debug
-  my_n_pt_a_grid = 6 ! small grid for quick debug
+  my_n_pt_r_grid = 10 ! small grid for quick debug
+  my_n_pt_a_grid = 26 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
-! call routine_int2_u_grad1u_j1b2 
-! call routine_v_ij_erf_rk_cst_mu_j1b
-! call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
-! call routine_v_ij_u_cst_mu_j1b
+ call routine_int2_u_grad1u_j1b2 
+ call routine_v_ij_erf_rk_cst_mu_j1b
+ call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
+ call routine_v_ij_u_cst_mu_j1b
 
 !
 ! call routine_test_j1b
 
- call routine_int2_grad1u2_grad2u2_j1b2
+! call routine_int2_grad1u2_grad2u2_j1b2
 end
 
 subroutine routine_test_j1b

From d731e31934b3a12c2ed07bad44cdea00da4bc553 Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Sat, 10 Dec 2022 14:14:45 +0100
Subject: [PATCH 08/68] fixed bug in vectorized integ

---
 data/basis/cc-pcvdz                           | 264 ++++++++++++++-
 src/ao_many_one_e_ints/ao_gaus_gauss.irp.f    |  20 +-
 .../prim_int_gauss_gauss.irp.f                |  66 ++--
 src/ao_tc_eff_map/potential.irp.f             |  10 +-
 src/bi_ortho_mos/bi_density.irp.f             |   2 +
 src/hartree_fock/scf.irp.f                    |  23 +-
 src/non_h_ints_mu/debug_integ_jmu_modif.irp.f | 183 ++++++++++-
 src/scf_utils/diagonalize_fock.irp.f          |   1 -
 src/scf_utils/diis.irp.f                      | 111 +++++++
 src/scf_utils/rh_scf_mo.irp.f                 | 308 ++++++++++++++++++
 src/scf_utils/rh_scf_modif.irp.f              | 196 +++++++++++
 src/scf_utils/rh_scf_simple.irp.f             | 130 ++++++++
 src/scf_utils/roothaan_hall_scf.irp.f         |   3 +-
 src/tc_scf/rh_tcscf.irp.f                     |  24 +-
 src/tc_scf/tc_scf.irp.f                       |   9 +-
 src/utils/integration.irp.f                   | 151 +++++----
 src/utils/one_e_integration.irp.f             |  42 +--
 17 files changed, 1393 insertions(+), 150 deletions(-)
 create mode 100644 src/scf_utils/rh_scf_mo.irp.f
 create mode 100644 src/scf_utils/rh_scf_modif.irp.f
 create mode 100644 src/scf_utils/rh_scf_simple.irp.f

diff --git a/data/basis/cc-pcvdz b/data/basis/cc-pcvdz
index d874fb06..76985d4a 100644
--- a/data/basis/cc-pcvdz
+++ b/data/basis/cc-pcvdz
@@ -991,4 +991,266 @@ D   1
 1         1.3743000              1.0000000
 D   1
 1         0.0537000              1.00000000
-$END
\ No newline at end of file
+
+COPPER
+S   20
+1         5.430321E+06           7.801026E-06
+2         8.131665E+05           6.065666E-05
+3         1.850544E+05           3.188964E-04
+4         5.241466E+04           1.344687E-03
+5         1.709868E+04           4.869050E-03
+6         6.171994E+03           1.561013E-02
+7         2.406481E+03           4.452077E-02
+8         9.972584E+02           1.103111E-01
+9         4.339289E+02           2.220342E-01
+10        1.962869E+02           3.133739E-01
+11        9.104280E+01           2.315121E-01
+12        4.138425E+01           7.640920E-02
+13        1.993278E+01           1.103818E-01
+14        9.581891E+00           1.094372E-01
+15        4.234516E+00           1.836311E-02
+16        1.985814E+00          -6.043084E-04
+17        8.670830E-01           5.092245E-05
+18        1.813390E-01          -5.540730E-05
+19        8.365700E-02           3.969482E-05
+20        3.626700E-02          -1.269538E-05
+S   20
+1         5.430321E+06          -4.404706E-06
+2         8.131665E+05          -3.424801E-05
+3         1.850544E+05          -1.801238E-04
+4         5.241466E+04          -7.600455E-04
+5         1.709868E+04          -2.759348E-03
+6         6.171994E+03          -8.900970E-03
+7         2.406481E+03          -2.579378E-02
+8         9.972584E+02          -6.623861E-02
+9         4.339289E+02          -1.445927E-01
+10        1.962869E+02          -2.440110E-01
+11        9.104280E+01          -2.504837E-01
+12        4.138425E+01           2.852577E-02
+13        1.993278E+01           5.115874E-01
+14        9.581891E+00           4.928061E-01
+15        4.234516E+00           8.788437E-02
+16        1.985814E+00          -5.820281E-03
+17        8.670830E-01           2.013508E-04
+18        1.813390E-01          -5.182553E-04
+19        8.365700E-02           3.731503E-04
+20        3.626700E-02          -1.193171E-04
+S   20
+1         5.430321E+06           9.704682E-07
+2         8.131665E+05           7.549245E-06
+3         1.850544E+05           3.968892E-05
+4         5.241466E+04           1.677200E-04
+5         1.709868E+04           6.095101E-04
+6         6.171994E+03           1.978846E-03
+7         2.406481E+03           5.798049E-03
+8         9.972584E+02           1.534158E-02
+9         4.339289E+02           3.540484E-02
+10        1.962869E+02           6.702098E-02
+11        9.104280E+01           8.026945E-02
+12        4.138425E+01          -1.927231E-02
+13        1.993278E+01          -3.160129E-01
+14        9.581891E+00          -4.573162E-01
+15        4.234516E+00           1.550841E-01
+16        1.985814E+00           7.202872E-01
+17        8.670830E-01           3.885122E-01
+18        1.813390E-01           1.924326E-02
+19        8.365700E-02          -7.103807E-03
+20        3.626700E-02           3.272906E-03
+S   20
+1         5.430321E+06          -1.959354E-07
+2         8.131665E+05          -1.523472E-06
+3         1.850544E+05          -8.014808E-06
+4         5.241466E+04          -3.383992E-05
+5         1.709868E+04          -1.231191E-04
+6         6.171994E+03          -3.992085E-04
+7         2.406481E+03          -1.171900E-03
+8         9.972584E+02          -3.096141E-03
+9         4.339289E+02          -7.171993E-03
+10        1.962869E+02          -1.356621E-02
+11        9.104280E+01          -1.643989E-02
+12        4.138425E+01           4.107628E-03
+13        1.993278E+01           6.693964E-02
+14        9.581891E+00           1.028221E-01
+15        4.234516E+00          -4.422945E-02
+16        1.985814E+00          -2.031191E-01
+17        8.670830E-01          -2.230022E-01
+18        1.813390E-01           2.517975E-01
+19        8.365700E-02           5.650091E-01
+20        3.626700E-02           3.247243E-01
+S   20
+1         5.430321E+06          -7.508267E-07
+2         8.131665E+05          -5.972018E-06
+3         1.850544E+05          -3.039682E-05
+4         5.241466E+04          -1.340405E-04
+5         1.709868E+04          -4.615778E-04
+6         6.171994E+03          -1.601064E-03
+7         2.406481E+03          -4.330942E-03
+8         9.972584E+02          -1.265434E-02
+9         4.339289E+02          -2.586864E-02
+10        1.962869E+02          -5.835428E-02
+11        9.104280E+01          -5.132322E-02
+12        4.138425E+01          -1.908953E-02
+13        1.993278E+01           3.586116E-01
+14        9.581891E+00           3.885818E-01
+15        4.234516E+00          -3.057106E-01
+16        1.985814E+00          -2.069896E+00
+17        8.670830E-01           2.431774E+00
+18        1.813390E-01          -2.121974E-02
+19        8.365700E-02          -1.820251E+00
+20        3.626700E-02           1.434585E+00
+S   20
+1         5.430321E+06          -3.532229E-07
+2         8.131665E+05          -2.798812E-06
+3         1.850544E+05          -1.432517E-05
+4         5.241466E+04          -6.270946E-05
+5         1.709868E+04          -2.179490E-04
+6         6.171994E+03          -7.474316E-04
+7         2.406481E+03          -2.049271E-03
+8         9.972584E+02          -5.885203E-03
+9         4.339289E+02          -1.226885E-02
+10        1.962869E+02          -2.683147E-02
+11        9.104280E+01          -2.479261E-02
+12        4.138425E+01          -5.984746E-03
+13        1.993278E+01           1.557124E-01
+14        9.581891E+00           1.436683E-01
+15        4.234516E+00           8.374103E-03
+16        1.985814E+00          -7.460711E-01
+17        8.670830E-01           1.244367E-01
+18        1.813390E-01           1.510110E+00
+19        8.365700E-02          -3.477122E-01
+20        3.626700E-02          -9.774169E-01
+S   1
+1         3.626700E-02           1.000000E+00
+S   1
+1         0.0157200              1.0000000
+P   16
+1         2.276057E+04           4.000000E-05
+2         5.387679E+03           3.610000E-04
+3         1.749945E+03           2.083000E-03
+4         6.696653E+02           9.197000E-03
+5         2.841948E+02           3.266000E-02
+6         1.296077E+02           9.379500E-02
+7         6.225415E+01           2.082740E-01
+8         3.092964E+01           3.339930E-01
+9         1.575827E+01           3.324930E-01
+10        8.094211E+00           1.547280E-01
+11        4.046921E+00           2.127100E-02
+12        1.967869E+00          -1.690000E-03
+13        9.252950E-01          -1.516000E-03
+14        3.529920E-01          -2.420000E-04
+15        1.273070E-01           2.300000E-05
+16        4.435600E-02          -9.000000E-06
+P   16
+1         2.276057E+04          -1.500000E-05
+2         5.387679E+03          -1.310000E-04
+3         1.749945E+03          -7.550000E-04
+4         6.696653E+02          -3.359000E-03
+5         2.841948E+02          -1.208100E-02
+6         1.296077E+02          -3.570300E-02
+7         6.225415E+01          -8.250200E-02
+8         3.092964E+01          -1.398900E-01
+9         1.575827E+01          -1.407290E-01
+10        8.094211E+00           3.876600E-02
+11        4.046921E+00           3.426950E-01
+12        1.967869E+00           4.523100E-01
+13        9.252950E-01           2.770540E-01
+14        3.529920E-01           4.388500E-02
+15        1.273070E-01          -2.802000E-03
+16        4.435600E-02           1.152000E-03
+P   16
+1         2.276057E+04           5.000000E-06
+2         5.387679E+03           4.900000E-05
+3         1.749945E+03           2.780000E-04
+4         6.696653E+02           1.253000E-03
+5         2.841948E+02           4.447000E-03
+6         1.296077E+02           1.337000E-02
+7         6.225415E+01           3.046900E-02
+8         3.092964E+01           5.344700E-02
+9         1.575827E+01           5.263900E-02
+10        8.094211E+00          -1.688100E-02
+11        4.046921E+00          -1.794480E-01
+12        1.967869E+00          -2.095880E-01
+13        9.252950E-01          -3.963300E-02
+14        3.529920E-01           5.021300E-01
+15        1.273070E-01           5.811110E-01
+16        4.435600E-02           4.566600E-02
+P   16
+1         2.276057E+04           1.100000E-05
+2         5.387679E+03           9.600000E-05
+3         1.749945E+03           5.900000E-04
+4         6.696653E+02           2.484000E-03
+5         2.841948E+02           9.463000E-03
+6         1.296077E+02           2.645300E-02
+7         6.225415E+01           6.568900E-02
+8         3.092964E+01           1.027320E-01
+9         1.575827E+01           1.370410E-01
+10        8.094211E+00          -7.096100E-02
+11        4.046921E+00          -5.047080E-01
+12        1.967869E+00          -4.780560E-01
+13        9.252950E-01           9.428920E-01
+14        3.529920E-01           5.446990E-01
+15        1.273070E-01          -8.327660E-01
+16        4.435600E-02          -1.084160E-01
+P   16
+1         2.276057E+04           3.000000E-06
+2         5.387679E+03           2.500000E-05
+3         1.749945E+03           1.470000E-04
+4         6.696653E+02           6.560000E-04
+5         2.841948E+02           2.351000E-03
+6         1.296077E+02           7.004000E-03
+7         6.225415E+01           1.613100E-02
+8         3.092964E+01           2.777000E-02
+9         1.575827E+01           2.756700E-02
+10        8.094211E+00          -1.011500E-02
+11        4.046921E+00          -8.100900E-02
+12        1.967869E+00          -1.104090E-01
+13        9.252950E-01          -7.173200E-02
+14        3.529920E-01           1.879300E-01
+15        1.273070E-01           5.646290E-01
+16        4.435600E-02           4.070000E-01
+P   1
+1         4.435600E-02           1.000000E+00
+P   1
+1         0.0154500              1.0000000
+D   8
+1         1.738970E+02           2.700000E-03
+2         5.188690E+01           2.090900E-02
+3         1.934190E+01           8.440800E-02
+4         7.975720E+00           2.139990E-01
+5         3.398230E+00           3.359800E-01
+6         1.409320E+00           3.573010E-01
+7         5.488580E-01           2.645780E-01
+8         1.901990E-01           1.039720E-01
+D   8
+1         1.738970E+02          -3.363000E-03
+2         5.188690E+01          -2.607900E-02
+3         1.934190E+01          -1.082310E-01
+4         7.975720E+00          -2.822170E-01
+5         3.398230E+00          -3.471900E-01
+6         1.409320E+00           2.671100E-02
+7         5.488580E-01           4.920470E-01
+8         1.901990E-01           4.384220E-01
+D   8
+1         1.738970E+02           4.133000E-03
+2         5.188690E+01           3.308500E-02
+3         1.934190E+01           1.383360E-01
+4         7.975720E+00           3.901660E-01
+5         3.398230E+00           1.698420E-01
+6         1.409320E+00          -6.830180E-01
+7         5.488580E-01          -2.657970E-01
+8         1.901990E-01           8.380630E-01
+D   1
+1         1.901990E-01           1.000000E+00
+D   1
+1         0.0659100              1.0000000
+F   1
+1         5.082100E+00           1.000000E+00
+F   1
+1         1.279700E+00           1.000000E+00
+F   1
+1         0.4617200              1.0000000
+G   1
+1         3.483500E+00           1.0000000
+G   1
+1         1.4597900              1.0000000
+$END
diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
index 213a63e4..4e091818 100644
--- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -177,7 +177,7 @@ subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_
   double precision, allocatable :: analytical_j(:)
 
   resv(:) = 0.d0
-  if(ao_overlap_abs(j,i).lt.1.d-12) then
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
     return
   endif
 
@@ -313,9 +313,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
   ASSERT(beta .gt. 0.d0)
 
   if(beta .lt. 1d-10) then
-
     call overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
-
     return
   endif
 
@@ -332,19 +330,20 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
   A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
   A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
 
-  allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) )
+  allocate(fact_g(n_points), G_center(n_points,3), analytical_j(n_points))
 
   bg  = beta  * gama_inv
   dg  = delta * gama_inv
   bdg = bg * delta 
-  do ipoint=1,n_points
+
+  do ipoint = 1, n_points
+
     G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1)
     G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2)
     G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3)
-    fact_g(ipoint) = bdg * ( &
-          (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1))  &
-        + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2))  &
-        + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
+    fact_g(ipoint) = bdg * ( (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1)) &
+                           + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2)) &
+                           + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
 
     if(fact_g(ipoint) < 10d0) then
       fact_g(ipoint) = dexp(-fact_g(ipoint))
@@ -368,8 +367,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
       do ipoint = 1, n_points
         coef12f = coef12 * fact_g(ipoint)
         resv(ipoint) += coef12f * analytical_j(ipoint)
-      end do
-
+      enddo
     enddo
   enddo
 
diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
index cfdaf95f..96893619 100644
--- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -1,5 +1,9 @@
-double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+! ---
+
+double precision function overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
   BEGIN_DOC
+  !
   ! Computes the following integral :
   !
   ! .. math                      ::
@@ -8,50 +12,60 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
   !
   END_DOC
 
-  implicit none
   include 'constants.include.F'
-  double precision, intent(in)   :: D_center(3), delta  ! pure gaussian "D"
-  double precision, intent(in)   :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
-  integer, intent(in)            :: power_A(3),power_B(3)
 
-  double precision               :: overlap_x,overlap_y,overlap_z,overlap
+  implicit none
+  double precision, intent(in) :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)          :: power_A(3),power_B(3)
+
+  double precision             :: overlap_x,overlap_y,overlap_z,overlap
   ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
-  double precision               :: A_new(0:max_dim,3)! new polynom
-  double precision               :: A_center_new(3)   ! new center
-  integer                        :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
-  double precision               :: alpha_new         ! new exponent
-  double precision               :: fact_a_new        ! constant factor
-  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr
-  integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
-  dim1=100
-  thr = 1.d-10
+  double precision             :: A_new(0:max_dim,3)! new polynom
+  double precision             :: A_center_new(3)   ! new center
+  integer                      :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision             :: alpha_new         ! new exponent
+  double precision             :: fact_a_new        ! constant factor
+  double precision             :: accu, coefx, coefy, coefz, coefxy, coefxyz, thr
+  integer                      :: d(3), i, lx, ly, lz, iorder_tmp(3), dim1
+
+  dim1 = 100
+  thr  = 1.d-10
   d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
 
   ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
-  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
-      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  call give_explicit_poly_and_gaussian( A_new, A_center_new , alpha_new, fact_a_new, iorder_a_new &
+                                      , delta, alpha, d, power_A, D_center, A_center, n_pt_max_integrals)
+
   ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
   accu = 0.d0
   do lx = 0, iorder_a_new(1)
     coefx = A_new(lx,1)
-    if(dabs(coefx).lt.thr)cycle
+    if(dabs(coefx) .lt. thr) cycle
     iorder_tmp(1) = lx
+
     do ly = 0, iorder_a_new(2)
-      coefy = A_new(ly,2)
+      coefy  = A_new(ly,2)
       coefxy = coefx * coefy
-      if(dabs(coefxy).lt.thr)cycle
+      if(dabs(coefxy) .lt. thr) cycle
       iorder_tmp(2) = ly
+
       do lz = 0, iorder_a_new(3)
-        coefz = A_new(lz,3)
+        coefz   = A_new(lz,3)
         coefxyz = coefxy * coefz
-        if(dabs(coefxyz).lt.thr)cycle
+        if(dabs(coefxyz) .lt. thr) cycle
         iorder_tmp(3) = lz
-        call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
+
+        call overlap_gaussian_xyz( A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B &
+                                 , overlap_x, overlap_y, overlap_z, overlap, dim1)
+
         accu += coefxyz * overlap
       enddo
     enddo
   enddo
+
   overlap_gauss_r12 = fact_a_new * accu
+
 end
 
 !---
@@ -95,11 +109,9 @@ subroutine overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_
 
   maxab = maxval(power_A(1:3))
 
-  allocate(A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
+  allocate(A_new(n_points,0:maxab,3), A_center_new(n_points,3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
 
-  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new,   &
-        alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, &
-        D_center, LD_D, A_center, n_points)
+  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, D_center, LD_D, A_center, n_points)
 
   rvec(:) = 0.d0
 
diff --git a/src/ao_tc_eff_map/potential.irp.f b/src/ao_tc_eff_map/potential.irp.f
index 67d572e5..37291563 100644
--- a/src/ao_tc_eff_map/potential.irp.f
+++ b/src/ao_tc_eff_map/potential.irp.f
@@ -165,7 +165,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 6.23519457d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -175,7 +175,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 55.39184787d0, 3.92151407d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -185,7 +185,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 19.90272209d0, 3.2671671d0 , 336.47320445d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -195,7 +195,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 6467.28126d0, 46.9071990d0, 9.09617721d0, 2.76883328d0, 360.367093d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -205,7 +205,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 2.54293498d+01, 1.40317872d+02, 7.14630801d+00, 2.65517675d+00, 1.45142619d+03, 1.00000000d+04 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f
index 0de8ce69..56f44da1 100644
--- a/src/bi_ortho_mos/bi_density.irp.f
+++ b/src/bi_ortho_mos/bi_density.irp.f
@@ -10,6 +10,7 @@ BEGIN_PROVIDER [double precision, TCSCF_bi_ort_dm_ao_alpha, (ao_num, ao_num) ]
  END_DOC
   call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
             , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
+            !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
             , 0.d0, TCSCF_bi_ort_dm_ao_alpha, size(TCSCF_bi_ort_dm_ao_alpha, 1) )
 END_PROVIDER
 
@@ -24,6 +25,7 @@ BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ]
  END_DOC
   call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
             , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
+            !, mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
             , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) )
 END_PROVIDER
 
diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f
index 3226073d..f4123c85 100644
--- a/src/hartree_fock/scf.irp.f
+++ b/src/hartree_fock/scf.irp.f
@@ -68,20 +68,33 @@ subroutine create_guess
   endif
 end
 
-subroutine run
+! ---
+
+subroutine run()
 
   BEGIN_DOC
-!   Run SCF calculation
+  ! Run SCF calculation
   END_DOC
 
   use bitmasks
   implicit none
 
-  integer                        :: i_it, i, j, k
-
   mo_label = 'Orthonormalized'
 
-  call Roothaan_Hall_SCF
+  PROVIDE scf_algorithm
+
+  if(scf_algorithm .eq. "DIIS_MO") then
+    call Roothaan_Hall_SCF_MO()
+  elseif(scf_algorithm .eq. "DIIS_MODIF") then
+    call Roothaan_Hall_SCF_MODIF()
+  elseif(scf_algorithm .eq. "DIIS") then
+    call Roothaan_Hall_SCF()
+  elseif(scf_algorithm .eq. "Simple") then
+    call Roothaan_Hall_SCF_Simple()
+  else
+    print *, ' not implemented yet:', scf_algorithm
+  endif
+
   call ezfio_set_hartree_fock_energy(SCF_energy)
 
 end
diff --git a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
index bb585f63..ca00b816 100644
--- a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
+++ b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
@@ -17,7 +17,7 @@ program debug_integ_jmu_modif
 
   PROVIDE mu_erf j1b_pen
 
-  call test_v_ij_u_cst_mu_j1b()
+!  call test_v_ij_u_cst_mu_j1b()
 !  call test_v_ij_erf_rk_cst_mu_j1b()
 !  call test_x_v_ij_erf_rk_cst_mu_j1b()
 !  call test_int2_u2_j1b2()
@@ -31,6 +31,9 @@ program debug_integ_jmu_modif
 !  call test_u12_grad1_u12_j1b_grad1_j1b()
 !  !call test_gradu_squared_u_ij_mu()
 
+  !call test_vect_overlap_gauss_r12_ao()
+  call test_vect_overlap_gauss_r12_ao_with1s()
+
 end
 
 ! ---
@@ -595,7 +598,183 @@ subroutine test_u12_grad1_u12_j1b_grad1_j1b()
   print*, ' normalz = ', normalz
 
   return
-end subroutine test_u12_grad1_u12_j1b_grad1_j1b,
+end subroutine test_u12_grad1_u12_j1b_grad1_j1b
 
 ! ---
 
+subroutine test_vect_overlap_gauss_r12_ao()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao
+
+  print *, ' test_vect_overlap_gauss_r12_ao ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit = expo_gauss_j_mu_x_2(1)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao(r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
+! ---
+
+subroutine test_vect_overlap_gauss_r12_ao_with1s()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3), beta, B_center(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print *, ' test_vect_overlap_gauss_r12_ao_with1s ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit    = expo_gauss_j_mu_x_2(1)
+  beta        = List_all_comb_b3_expo  (2)
+  B_center(1) = List_all_comb_b3_cent(1,2)
+  B_center(2) = List_all_comb_b3_cent(2,2)
+  B_center(3) = List_all_comb_b3_cent(3,2)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
diff --git a/src/scf_utils/diagonalize_fock.irp.f b/src/scf_utils/diagonalize_fock.irp.f
index a567b9c7..da1d44a7 100644
--- a/src/scf_utils/diagonalize_fock.irp.f
+++ b/src/scf_utils/diagonalize_fock.irp.f
@@ -57,7 +57,6 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num)
    do i = elec_beta_num+1, elec_alpha_num
      F(i,i) += 0.5d0*level_shift
    enddo
-
    do i = elec_alpha_num+1, mo_num
      F(i,i) += level_shift
    enddo
diff --git a/src/scf_utils/diis.irp.f b/src/scf_utils/diis.irp.f
index 713de1b3..00d4addb 100644
--- a/src/scf_utils/diis.irp.f
+++ b/src/scf_utils/diis.irp.f
@@ -1,3 +1,5 @@
+! ---
+
 BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero ]
  implicit none
  BEGIN_DOC
@@ -12,6 +14,8 @@ BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero ]
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO, (AO_num, AO_num)]
   implicit none
   BEGIN_DOC
@@ -60,6 +64,8 @@ BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO, (AO_num, AO_num)]
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO, (mo_num, mo_num)]
   implicit none
   begin_doc
@@ -69,6 +75,7 @@ BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO, (mo_num, mo_num)]
      FPS_SPF_Matrix_MO, size(FPS_SPF_Matrix_MO,1))
 END_PROVIDER
 
+! ---
 
  BEGIN_PROVIDER [ double precision, eigenvalues_Fock_matrix_AO, (AO_num) ]
 &BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_AO, (AO_num,AO_num) ]
@@ -137,3 +144,107 @@ END_PROVIDER
 
 END_PROVIDER
 
+! ---
+
+!BEGIN_PROVIDER [double precision, error_diis_Fmo, (ao_num, ao_num)]
+!
+!  BEGIN_DOC
+!  !
+!  ! error_diis_Fmo = (S x C) x [F_mo x \eta_occ - \eta_occ x F_mo] x (S x C).T
+!  !
+!  ! \eta_occ is the matrix of occupation : \eta_occ = \eta_occ(alpha) + \eta_occ(beta)
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: i, j
+!  double precision, allocatable :: tmp(:,:)
+!
+!  provide Fock_matrix_mo
+!
+!  allocate(tmp(mo_num,mo_num))
+!  tmp = 0.d0
+!
+!  ! F_mo x \eta_occ(alpha) - \eta_occ x F_mo(alpha)
+!  do j = 1, elec_alpha_num
+!    do i = elec_alpha_num + 1, mo_num
+!      tmp(i,j) = Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!  do j = elec_alpha_num + 1, mo_num
+!    do i = 1, elec_alpha_num
+!      tmp(i,j) = -Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!
+!  ! F_mo x \eta_occ(beta) - \eta_occ x F_mo(beta)
+!  do j = 1, elec_beta_num
+!    do i = elec_beta_num + 1, mo_num
+!      tmp(i,j) += Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!  do j = elec_beta_num + 1, mo_num
+!    do i = 1, elec_beta_num
+!      tmp(i,j) -= Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!
+!  call mo_to_ao(tmp, size(tmp, 1), error_diis_Fmo, size(error_diis_Fmo, 1))
+!  
+!  deallocate(tmp)
+!
+!END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, error_diis_Fmo, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! error_diis_Fmo = [F_mo x \eta_occ - \eta_occ x F_mo]
+  !
+  ! \eta_occ is the matrix of occupation : \eta_occ = \eta_occ(alpha) + \eta_occ(beta)
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j
+  double precision, allocatable :: tmp(:,:)
+
+  provide Fock_matrix_mo
+
+  error_diis_Fmo = 0.d0
+
+  ! F_mo x \eta_occ(alpha) - \eta_occ x F_mo(alpha)
+  do j = 1, elec_alpha_num
+    do i = elec_alpha_num + 1, mo_num
+      error_diis_Fmo(i,j) += Fock_matrix_mo(i,j)
+    enddo
+  enddo
+  do j = elec_alpha_num + 1, mo_num
+    do i = 1, elec_alpha_num
+      error_diis_Fmo(i,j) -= Fock_matrix_mo(i,j)
+    enddo
+  enddo
+
+  ! F_mo x \eta_occ(beta) - \eta_occ x F_mo(beta)
+  do j = 1, elec_beta_num
+    do i = elec_beta_num + 1, mo_num
+      error_diis_Fmo(i,j) += Fock_matrix_mo(i,j)
+    enddo
+  enddo
+  do j = elec_beta_num + 1, mo_num
+    do i = 1, elec_beta_num
+      error_diis_Fmo(i,j) -= Fock_matrix_mo(i,j)
+    enddo
+  enddo
+
+  !allocate(tmp(ao_num,ao_num))
+  !call mo_to_ao(error_diis_Fmo, size(error_diis_Fmo, 1), tmp, size(tmp, 1))
+  !call ao_to_mo(tmp, size(tmp, 1), error_diis_Fmo, size(error_diis_Fmo, 1))
+  !deallocate(tmp)
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/scf_utils/rh_scf_mo.irp.f b/src/scf_utils/rh_scf_mo.irp.f
new file mode 100644
index 00000000..5b70fb9c
--- /dev/null
+++ b/src/scf_utils/rh_scf_mo.irp.f
@@ -0,0 +1,308 @@
+! ---
+
+subroutine Roothaan_Hall_SCF_MO()
+
+  BEGIN_DOC
+  !
+  ! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+  !
+  END_DOC
+
+  implicit none
+
+  double precision              :: energy_SCF, energy_SCF_previous, Delta_energy_SCF
+  double precision              :: max_error_DIIS
+  double precision, allocatable :: Fock_matrix_DIIS(:,:,:), error_matrix_DIIS(:,:,:)
+
+  integer                       :: iteration_SCF, dim_DIIS, index_dim_DIIS
+
+  integer                       :: i, j
+  double precision              :: level_shift_save
+  double precision, allocatable :: mo_coef_save(:,:)
+
+  logical, external             :: qp_stop
+
+  PROVIDE ao_md5 mo_occ level_shift
+
+  allocate( mo_coef_save(ao_num,mo_num)                   &
+          , Fock_matrix_DIIS (mo_num,mo_num,max_dim_DIIS) &
+          , error_matrix_DIIS(mo_num,mo_num,max_dim_DIIS) )
+
+  Fock_matrix_DIIS  = 0.d0
+  error_matrix_DIIS = 0.d0
+  mo_coef_save      = 0.d0
+
+  call write_time(6)
+
+  print*,'energy of the guess = ',SCF_energy
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'energy  ', 'energy diff  ',  'DIIS error  ', 'Level shift   '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+
+! Initialize energies and density matrices
+  energy_SCF_previous = SCF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  dim_DIIS            = 0
+  max_error_DIIS      = 1.d0
+
+
+!
+! Start of main SCF loop
+!
+  PROVIDE Fock_matrix_mo error_diis_Fmo
+
+  do while ( &
+    ( (max_error_DIIS > threshold_DIIS_nonzero) .or. &
+      (dabs(Delta_energy_SCF) > thresh_SCF) &
+    ) .and. (iteration_SCF < n_it_SCF_max) )
+
+    iteration_SCF += 1
+    if(frozen_orb_scf) then
+     call initialize_mo_coef_begin_iteration
+    endif
+
+    dim_DIIS = min(dim_DIIS+1, max_dim_DIIS)
+
+    if( (scf_algorithm == 'DIIS_MO').and.(dabs(Delta_energy_SCF) > 1.d-6))  then
+    !if(scf_algorithm == 'DIIS_MO') then
+
+      index_dim_DIIS = mod(dim_DIIS-1, max_dim_DIIS) + 1
+      do j = 1, mo_num
+        do i = 1, mo_num
+          Fock_matrix_DIIS (i,j,index_dim_DIIS) = Fock_matrix_mo(i,j)
+          error_matrix_DIIS(i,j,index_dim_DIIS) = error_diis_Fmo(i,j)
+        enddo
+      enddo
+
+      call extrapolate_Fock_matrix_mo(error_matrix_DIIS, Fock_matrix_DIIS, Fock_matrix_mo, size(Fock_matrix_mo, 1), iteration_SCF, dim_DIIS)
+      do i = 1, mo_num
+        Fock_matrix_diag_mo(i) = Fock_matrix_mo(i,i)
+      enddo
+      TOUCH Fock_matrix_mo fock_matrix_diag_mo
+    endif
+
+    mo_coef = eigenvectors_Fock_matrix_mo
+    if(frozen_orb_scf) then
+      call reorder_core_orb
+      call initialize_mo_coef_begin_iteration
+    endif
+
+    TOUCH mo_coef
+
+    max_error_DIIS = maxval(Abs(error_diis_Fmo))
+
+    energy_SCF = SCF_energy
+    Delta_energy_SCF = energy_SCF - energy_SCF_previous
+
+    if( (SCF_algorithm == 'DIIS_MO') .and. (Delta_energy_SCF > 0.d0) ) then
+      Fock_matrix_MO(1:mo_num,1:mo_num) = Fock_matrix_DIIS(1:mo_num,1:mo_num,index_dim_DIIS)
+      do i = 1, mo_num
+        Fock_matrix_diag_mo(i) = Fock_matrix_mo(i,i)
+      enddo
+      TOUCH Fock_matrix_mo fock_matrix_diag_mo
+      mo_coef = eigenvectors_Fock_matrix_mo
+      max_error_DIIS = maxval(Abs(error_diis_Fmo))
+      energy_SCF = SCF_energy
+      Delta_energy_SCF = energy_SCF - energy_SCF_previous
+    endif
+
+    level_shift_save = level_shift
+    mo_coef_save(1:ao_num,1:mo_num) = mo_coef(1:ao_num,1:mo_num)
+    do while(Delta_energy_SCF > 0.d0)
+      mo_coef(1:ao_num,1:mo_num) = mo_coef_save(1:ao_num,1:mo_num)
+      if(level_shift <= .1d0) then
+        level_shift = 1.d0
+      else
+        level_shift = level_shift * 3.0d0
+      endif
+      TOUCH mo_coef level_shift
+      mo_coef(1:ao_num,1:mo_num) = eigenvectors_Fock_matrix_mo(1:ao_num,1:mo_num)
+      if(frozen_orb_scf) then
+        call reorder_core_orb
+        call initialize_mo_coef_begin_iteration
+      endif
+      TOUCH mo_coef
+      Delta_energy_SCF = SCF_energy - energy_SCF_previous
+      energy_SCF = SCF_energy
+      if(level_shift-level_shift_save > 40.d0) then
+        level_shift = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift
+        exit
+      endif
+
+      dim_DIIS=0
+    enddo
+
+    level_shift = level_shift * 0.5d0
+    SOFT_TOUCH level_shift
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS
+
+    if(Delta_energy_SCF < 0.d0) then
+      call save_mos
+    endif
+
+    if(qp_stop()) exit
+  enddo
+
+!
+! End of Main SCF loop
+!
+
+  if(iteration_SCF < n_it_SCF_max) then
+    mo_label = 'Canonical'
+  endif
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,*)
+
+  if(.not.frozen_orb_scf)then
+   call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo, size(Fock_matrix_mo, 1), size(Fock_matrix_mo, 2), mo_label, 1, .true.)
+   call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
+   call orthonormalize_mos
+   call save_mos
+  endif
+
+  call write_double(6, energy_SCF, 'SCF energy')
+
+  call write_time(6)
+
+end
+
+! ---
+
+subroutine extrapolate_Fock_matrix_mo(error_matrix_DIIS, Fock_matrix_DIIS, Fock_matrix_MO_, size_Fock_matrix_MO, iteration_SCF, dim_DIIS)
+
+  BEGIN_DOC
+  ! Compute the extrapolated Fock matrix using the DIIS procedure
+  END_DOC
+
+  implicit none
+
+  integer,intent(inout)         :: dim_DIIS
+  double precision,intent(in)   :: Fock_matrix_DIIS(mo_num,mo_num,dim_DIIS), error_matrix_DIIS(mo_num,mo_num,dim_DIIS)
+  integer,intent(in)            :: iteration_SCF, size_Fock_matrix_MO
+  double precision,intent(inout):: Fock_matrix_MO_(size_Fock_matrix_MO,mo_num)
+
+  double precision,allocatable  :: B_matrix_DIIS(:,:),X_vector_DIIS(:)
+  double precision,allocatable  :: C_vector_DIIS(:)
+
+  double precision,allocatable  :: scratch(:,:)
+  integer                       :: i,j,k,l,i_DIIS,j_DIIS
+  double precision :: rcond, ferr, berr
+  integer, allocatable :: iwork(:)
+  integer :: lwork
+
+  if(dim_DIIS < 1) then
+    return
+  endif
+
+  allocate(                               &
+    B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), &
+    X_vector_DIIS(dim_DIIS+1),            &
+    C_vector_DIIS(dim_DIIS+1),            &
+    scratch(mo_num,mo_num)                &
+  )
+
+  ! Compute the matrices B and X
+  B_matrix_DIIS(:,:) = 0.d0
+  do j = 1, dim_DIIS
+    j_DIIS = min(dim_DIIS, mod(iteration_SCF-j, max_dim_DIIS) + 1)
+
+    do i = 1, dim_DIIS
+      i_DIIS = min(dim_DIIS, mod(iteration_SCF-i, max_dim_DIIS) + 1)
+
+      ! Compute product of two errors vectors
+      do l = 1, mo_num
+        do k = 1, mo_num
+          B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + error_matrix_DIIS(k,l,i_DIIS) * error_matrix_DIIS(k,l,j_DIIS)
+        enddo
+      enddo
+
+    enddo
+  enddo
+
+! Pad B matrix and build the X matrix
+
+  C_vector_DIIS(:) = 0.d0
+  do i = 1, dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+  enddo
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+  deallocate(scratch)
+
+! Estimate condition number of B
+  double precision :: anorm
+  integer              :: info
+  integer,allocatable  :: ipiv(:)
+  double precision, allocatable :: AF(:,:)
+  double precision, external :: dlange
+
+  lwork = max((dim_DIIS+1)**2, (dim_DIIS+1)*5)
+  allocate(AF(dim_DIIS+1,dim_DIIS+1))
+  allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) )
+  allocate(scratch(lwork,1))
+  scratch(:,1) = 0.d0
+
+  anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, size(B_matrix_DIIS, 1), scratch(1,1))
+
+  AF(:,:) = B_matrix_DIIS(:,:)
+  call dgetrf(dim_DIIS+1, dim_DIIS+1, AF, size(AF, 1), ipiv, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  call dgecon( '1', dim_DIIS+1, AF, size(AF, 1), anorm, rcond, scratch, iwork, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  if(rcond < 1.d-14) then
+    dim_DIIS = 0
+    return
+  endif
+
+  ! solve the linear system C = B.X
+
+  X_vector_DIIS = C_vector_DIIS
+  call dgesv(dim_DIIS+1 , 1, B_matrix_DIIS, size(B_matrix_DIIS, 1), ipiv, X_vector_DIIS, size(X_vector_DIIS, 1), info)
+
+  deallocate(scratch, AF, iwork)
+
+  if(info < 0) then
+    stop 'bug in DIIS_MO'
+  endif
+
+  ! Compute extrapolated Fock matrix
+
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (mo_num > 200)
+  do j = 1, mo_num
+    do i = 1, mo_num
+      Fock_matrix_MO_(i,j) = 0.d0
+    enddo
+    do k = 1, dim_DIIS
+      if(dabs(X_vector_DIIS(k)) < 1.d-10) cycle
+      do i = 1, mo_num
+        ! FPE here
+        Fock_matrix_MO_(i,j) = Fock_matrix_MO_(i,j) + X_vector_DIIS(k) * Fock_matrix_DIIS(i,j,dim_DIIS-k+1)
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+end
+
diff --git a/src/scf_utils/rh_scf_modif.irp.f b/src/scf_utils/rh_scf_modif.irp.f
new file mode 100644
index 00000000..c63871f3
--- /dev/null
+++ b/src/scf_utils/rh_scf_modif.irp.f
@@ -0,0 +1,196 @@
+subroutine Roothaan_Hall_SCF_MODIF
+
+BEGIN_DOC
+! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+END_DOC
+
+  implicit none
+
+  double precision               :: energy_SCF,energy_SCF_previous,Delta_energy_SCF
+  double precision               :: max_error_DIIS,max_error_DIIS_alpha,max_error_DIIS_beta
+  double precision, allocatable  :: Fock_matrix_DIIS(:,:,:),error_matrix_DIIS(:,:,:)
+
+  integer                        :: iteration_SCF,dim_DIIS,index_dim_DIIS
+
+  integer                        :: i,j
+  logical, external              :: qp_stop
+  double precision, allocatable :: mo_coef_save(:,:)
+
+  PROVIDE ao_md5 mo_occ level_shift
+
+  allocate(mo_coef_save(ao_num,mo_num),                          &
+      Fock_matrix_DIIS (ao_num,ao_num,max_dim_DIIS),                 &
+      error_matrix_DIIS(ao_num,ao_num,max_dim_DIIS)                  &
+      )
+
+  Fock_matrix_DIIS = 0.d0
+  error_matrix_DIIS = 0.d0
+  mo_coef_save = 0.d0
+
+  call write_time(6)
+
+  print*,'energy of the guess = ',SCF_energy
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'energy  ', 'energy diff  ',  'DIIS error  ', 'Level shift   '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+
+! Initialize energies and density matrices
+  energy_SCF_previous = SCF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  dim_DIIS            = 0
+  max_error_DIIS      = 1.d0
+
+
+!
+! Start of main SCF loop
+!
+  PROVIDE FPS_SPF_matrix_AO Fock_matrix_AO 
+
+  do while ( &
+    ( (max_error_DIIS > threshold_DIIS_nonzero) .or. &
+      (dabs(Delta_energy_SCF) > thresh_SCF) &
+    ) .and. (iteration_SCF < n_it_SCF_max) )
+
+! Increment cycle number
+
+    iteration_SCF += 1
+    if(frozen_orb_scf)then
+     call initialize_mo_coef_begin_iteration
+    endif
+
+! Current size of the DIIS space
+
+    dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
+
+    if( (scf_algorithm == 'DIIS_MODIF') .and. (dabs(Delta_energy_SCF) > 1.d-6) )  then
+    !if(scf_algorithm == 'DIIS_MODIF') then
+
+      ! Store Fock and error matrices at each iteration
+      index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
+      do j=1,ao_num
+        do i=1,ao_num
+          Fock_matrix_DIIS (i,j,index_dim_DIIS) = Fock_matrix_AO(i,j)
+          error_matrix_DIIS(i,j,index_dim_DIIS) = FPS_SPF_matrix_AO(i,j)
+        enddo
+      enddo
+
+      ! Compute the extrapolated Fock matrix
+
+      call extrapolate_Fock_matrix(                                    &
+          error_matrix_DIIS,Fock_matrix_DIIS,                          &
+          Fock_matrix_AO,size(Fock_matrix_AO,1),                       &
+          iteration_SCF,dim_DIIS                                       &
+          )
+      call ao_to_mo(Fock_matrix_AO, size(Fock_matrix_AO, 1), Fock_matrix_MO, size(Fock_matrix_MO, 1))
+      do i = 1, mo_num
+        Fock_matrix_diag_MO(i) = Fock_matrix_MO(i,i)
+      enddo
+      TOUCH Fock_matrix_MO Fock_matrix_diag_MO
+
+      !Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
+      !Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
+      !TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta
+    endif
+
+    MO_coef = eigenvectors_Fock_matrix_MO
+    if(frozen_orb_scf)then
+     call reorder_core_orb
+     call initialize_mo_coef_begin_iteration
+    endif
+
+    TOUCH MO_coef
+
+!   Calculate error vectors
+
+    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
+
+!   SCF energy
+
+    energy_SCF = SCF_energy
+    Delta_energy_SCF = energy_SCF - energy_SCF_previous
+    if( (SCF_algorithm == 'DIIS_MODIF') .and. (Delta_energy_SCF > 0.d0) ) then
+      Fock_matrix_AO(1:ao_num,1:ao_num) = Fock_matrix_DIIS(1:ao_num,1:ao_num,index_dim_DIIS)
+      call ao_to_mo(Fock_matrix_AO, size(Fock_matrix_AO, 1), Fock_matrix_MO, size(Fock_matrix_MO, 1))
+      do i = 1, mo_num
+        Fock_matrix_diag_MO(i) = Fock_matrix_MO(i,i)
+      enddo
+      TOUCH Fock_matrix_MO Fock_matrix_diag_MO
+
+      !Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
+      !Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
+      !TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta
+    endif
+
+    double precision :: level_shift_save
+    level_shift_save = level_shift
+    mo_coef_save(1:ao_num,1:mo_num) = mo_coef(1:ao_num,1:mo_num)
+    do while (Delta_energy_SCF > 0.d0)
+      mo_coef(1:ao_num,1:mo_num) = mo_coef_save
+      if (level_shift <= .1d0) then
+        level_shift = 1.d0
+      else
+        level_shift = level_shift * 3.0d0
+      endif
+      TOUCH mo_coef level_shift
+      mo_coef(1:ao_num,1:mo_num) = eigenvectors_Fock_matrix_MO(1:ao_num,1:mo_num)
+      if(frozen_orb_scf)then
+        call reorder_core_orb
+        call initialize_mo_coef_begin_iteration
+      endif
+      TOUCH mo_coef
+      Delta_energy_SCF = SCF_energy - energy_SCF_previous
+      energy_SCF = SCF_energy
+      if (level_shift-level_shift_save > 40.d0) then
+        level_shift = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift
+        exit
+      endif
+
+      dim_DIIS=0
+    enddo
+
+    level_shift = level_shift * 0.5d0
+    SOFT_TOUCH level_shift
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS
+
+    if (Delta_energy_SCF < 0.d0) then
+      call save_mos
+    endif
+    if (qp_stop()) exit
+
+  enddo
+
+ if (iteration_SCF < n_it_SCF_max) then
+   mo_label = 'Canonical'
+ endif
+!
+! End of Main SCF loop
+!
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,*)
+
+  if(.not.frozen_orb_scf)then
+   call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo,size(Fock_matrix_mo,1), &
+      size(Fock_matrix_mo,2),mo_label,1,.true.)
+   call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef,1), 1.d-10)
+   call orthonormalize_mos
+   call save_mos
+  endif
+
+  call write_double(6, energy_SCF, 'SCF energy')
+
+  call write_time(6)
+
+end
+
diff --git a/src/scf_utils/rh_scf_simple.irp.f b/src/scf_utils/rh_scf_simple.irp.f
new file mode 100644
index 00000000..59b12749
--- /dev/null
+++ b/src/scf_utils/rh_scf_simple.irp.f
@@ -0,0 +1,130 @@
+subroutine Roothaan_Hall_SCF_Simple
+
+BEGIN_DOC
+! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+END_DOC
+
+  implicit none
+
+  integer                        :: iteration_SCF, dim_DIIS
+  double precision               :: energy_SCF,energy_SCF_previous,Delta_energy_SCF
+  double precision               :: max_error_DIIS
+
+  integer                        :: i,j
+  logical, external              :: qp_stop
+  double precision, allocatable :: mo_coef_save(:,:)
+
+  PROVIDE ao_md5 mo_occ level_shift
+
+  allocate(mo_coef_save(ao_num,mo_num))
+
+
+  dim_DIIS     = 0
+  mo_coef_save = 0.d0
+
+  call write_time(6)
+
+  print*,'energy of the guess = ',SCF_energy
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'energy  ', 'energy diff  ',  'DIIS error  ', 'Level shift   '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+
+! Initialize energies and density matrices
+  energy_SCF_previous = SCF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  max_error_DIIS      = 1.d0
+
+  do while ( &
+    ( (max_error_DIIS > threshold_DIIS_nonzero) .or. &
+      (dabs(Delta_energy_SCF) > thresh_SCF) &
+    ) .and. (iteration_SCF < n_it_SCF_max) )
+
+    iteration_SCF += 1
+    if(frozen_orb_scf)then
+     call initialize_mo_coef_begin_iteration
+    endif
+
+    MO_coef = eigenvectors_Fock_matrix_MO
+    if(frozen_orb_scf)then
+     call reorder_core_orb
+     call initialize_mo_coef_begin_iteration
+    endif
+    TOUCH MO_coef
+
+!   Calculate error vectors
+    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
+
+!   SCF energy
+
+    energy_SCF = SCF_energy
+    Delta_energy_SCF = energy_SCF - energy_SCF_previous
+
+    double precision :: level_shift_save
+    level_shift_save = level_shift
+    mo_coef_save(1:ao_num,1:mo_num) = mo_coef(1:ao_num,1:mo_num)
+    do while (Delta_energy_SCF > 0.d0)
+      mo_coef(1:ao_num,1:mo_num) = mo_coef_save
+      if (level_shift <= .1d0) then
+        level_shift = 1.d0
+      else
+        level_shift = level_shift * 3.0d0
+      endif
+      TOUCH mo_coef level_shift
+      mo_coef(1:ao_num,1:mo_num) = eigenvectors_Fock_matrix_MO(1:ao_num,1:mo_num)
+      if(frozen_orb_scf)then
+        call reorder_core_orb
+        call initialize_mo_coef_begin_iteration
+      endif
+      TOUCH mo_coef
+      Delta_energy_SCF = SCF_energy - energy_SCF_previous
+      energy_SCF = SCF_energy
+      if (level_shift-level_shift_save > 40.d0) then
+        level_shift = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift
+        exit
+      endif
+
+    enddo
+
+    level_shift = level_shift * 0.5d0
+    SOFT_TOUCH level_shift
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS
+
+    if(Delta_energy_SCF < 0.d0) then
+      call save_mos
+    endif
+    if(qp_stop()) exit
+
+  enddo
+
+  if (iteration_SCF < n_it_SCF_max) then
+    mo_label = 'Canonical'
+  endif
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,*)
+
+  if(.not.frozen_orb_scf)then
+   call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo,size(Fock_matrix_mo,1), &
+      size(Fock_matrix_mo,2),mo_label,1,.true.)
+   call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef,1), 1.d-10)
+   call orthonormalize_mos
+   call save_mos
+  endif
+
+  call write_double(6, energy_SCF, 'SCF energy')
+
+  call write_time(6)
+
+end
+
diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f
index 56a1ed8e..45522079 100644
--- a/src/scf_utils/roothaan_hall_scf.irp.f
+++ b/src/scf_utils/roothaan_hall_scf.irp.f
@@ -66,7 +66,8 @@ END_DOC
 
     dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
 
-    if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-6) )  then
+    if( (scf_algorithm == 'DIIS') .and. (dabs(Delta_energy_SCF) > 1.d-6))  then
+    !if(scf_algorithm == 'DIIS') then
 
       ! Store Fock and error matrices at each iteration
       index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
diff --git a/src/tc_scf/rh_tcscf.irp.f b/src/tc_scf/rh_tcscf.irp.f
index 597c3e67..0312df5f 100644
--- a/src/tc_scf/rh_tcscf.irp.f
+++ b/src/tc_scf/rh_tcscf.irp.f
@@ -67,10 +67,9 @@ subroutine rh_tcscf()
     iteration_TCSCF += 1
     if(iteration_TCSCF > n_it_TCSCF_max) then
       print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
-      exit
+      stop
     endif
 
-    ! current size of the DIIS space
     dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF)
 
     ! ---
@@ -86,10 +85,7 @@ subroutine rh_tcscf()
         enddo
       enddo
 
-      ! Compute the extrapolated Fock matrix
-      call extrapolate_TC_Fock_matrix( e_DIIS, F_DIIS                                        &
-                                     , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
-                                     , iteration_TCSCF, dim_DIIS )
+      call extrapolate_TC_Fock_matrix(e_DIIS, F_DIIS, Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), iteration_TCSCF, dim_DIIS)
 
       Fock_matrix_tc_ao_alpha = 0.5d0 * Fock_matrix_tc_ao_tot
       Fock_matrix_tc_ao_beta  = 0.5d0 * Fock_matrix_tc_ao_tot
@@ -100,7 +96,6 @@ subroutine rh_tcscf()
       call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta , size(Fock_matrix_tc_ao_beta , 1) &
                             , Fock_matrix_tc_mo_beta , size(Fock_matrix_tc_mo_beta , 1) )
       TOUCH Fock_matrix_tc_mo_alpha Fock_matrix_tc_mo_beta
-
     endif
 
     ! ---
@@ -121,9 +116,10 @@ subroutine rh_tcscf()
 
     ! ---
 
-    do while((dabs(delta_energy_tmp) > 0.1d0) .and. (iteration_TCSCF > 1))
-!      print *, ' very big step  : ', delta_energy_tmp
-!      print *, ' TC level shift = ', level_shift_TCSCF
+    do while((delta_gradie_tmp > 1.d-7) .and. (iteration_TCSCF > 1))
+    !do while((dabs(delta_energy_tmp) > 0.5d0) .and. (iteration_TCSCF > 1))
+      print *, ' very big or bad step  : ', delta_energy_tmp, delta_gradie_tmp
+      print *, ' TC level shift = ', level_shift_TCSCF
 
       mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num) 
       mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num) 
@@ -139,7 +135,8 @@ subroutine rh_tcscf()
       mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
       TOUCH mo_l_coef mo_r_coef
 
-      delta_energy_tmp = TC_HF_energy - energy_TCSCF_previous
+      delta_energy_tmp = TC_HF_energy    - energy_TCSCF_previous
+      delta_gradie_tmp = grad_non_hermit - gradie_TCSCF_previous
 
       if(level_shift_TCSCF - level_shift_save > 40.d0) then
         level_shift_TCSCF = level_shift_save * 4.d0
@@ -183,7 +180,7 @@ subroutine rh_tcscf()
     print *, ' 1-e   TC energy   = ', energy_TCSCF_1e
     print *, ' 2-e   TC energy   = ', energy_TCSCF_2e
     print *, ' 3-e   TC energy   = ', energy_TCSCF_3e
-    print *, ' |delta TC energy| = ', delta_energy_TCSCF
+    print *, ' |delta TC energy| = ', dabs(delta_energy_TCSCF)
     print *, ' TC gradient       = ', gradie_TCSCF
     print *, ' delta TC gradient = ', delta_gradie_TCSCF
     print *, ' max TC DIIS error = ', max_error_DIIS_TCSCF 
@@ -199,6 +196,9 @@ subroutine rh_tcscf()
 
   ! ---
 
+  print *, ' TCSCF DIIS converged !'
+  call print_energy_and_mos()
+
   call write_time(6)
 
   deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, e_DIIS)
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
index 283ec2ae..c84c837f 100644
--- a/src/tc_scf/tc_scf.irp.f
+++ b/src/tc_scf/tc_scf.irp.f
@@ -21,8 +21,11 @@ program tc_scf
   PROVIDE tcscf_algorithm
   if(tcscf_algorithm == 'DIIS') then
     call rh_tcscf()
-  else
+  elseif(tcscf_algorithm == 'Simple') then
     call simple_tcscf()
+  else
+    print *, ' not implemented yet', tcscf_algorithm
+    stop
   endif
 
   call minimize_tc_orb_angles()
@@ -127,7 +130,7 @@ subroutine simple_tcscf()
       it += 1
       if(it > n_it_tcscf_max) then
         print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
-        exit
+        stop
       endif
 
 
@@ -190,7 +193,7 @@ subroutine simple_tcscf()
 
   endif
 
-  print*,'Energy converged !'
+  print *, ' TCSCF Simple converged !'
   call print_energy_and_mos()
 
   deallocate(rho_old, rho_new)
diff --git a/src/utils/integration.irp.f b/src/utils/integration.irp.f
index f593cefb..5079daa7 100644
--- a/src/utils/integration.irp.f
+++ b/src/utils/integration.irp.f
@@ -48,7 +48,7 @@ end
 
 
 ! TODO remove dim
-subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,beta,a,b,A_center,B_center,dim)
+subroutine give_explicit_poly_and_gaussian(P_new, P_center, p, fact_k, iorder, alpha, beta, a, b, A_center, B_center, dim)
 
   BEGIN_DOC
   ! Transforms the product of
@@ -65,19 +65,19 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
 
   implicit none
   include 'constants.include.F'
-  integer, intent(in)            :: dim
-  integer, intent(in)            :: a(3),b(3)         ! powers : (x-xa)**a_x = (x-A(1))**a(1)
-  double precision, intent(in)   :: alpha, beta       ! exponents
-  double precision, intent(in)   :: A_center(3)       ! A center
-  double precision, intent(in)   :: B_center (3)      ! B center
-  double precision, intent(out)  :: P_center(3)       ! new center
-  double precision, intent(out)  :: p                 ! new exponent
-  double precision, intent(out)  :: fact_k            ! constant factor
-  double precision, intent(out)  :: P_new(0:max_dim,3)! polynomial
-  integer, intent(out)           :: iorder(3)         ! i_order(i) = order of the polynomials
+  integer,          intent(in)  :: dim
+  integer,          intent(in)  :: a(3), b(3)        ! powers : (x-xa)**a_x = (x-A(1))**a(1)
+  double precision, intent(in)  :: alpha, beta       ! exponents
+  double precision, intent(in)  :: A_center(3)       ! A center
+  double precision, intent(in)  :: B_center (3)      ! B center
+  integer,          intent(out) :: iorder(3)         ! i_order(i) = order of the polynomials
+  double precision, intent(out) :: P_center(3)       ! new center
+  double precision, intent(out) :: p                 ! new exponent
+  double precision, intent(out) :: fact_k            ! constant factor
+  double precision, intent(out) :: P_new(0:max_dim,3)! polynomial
 
-  double precision               :: P_a(0:max_dim,3), P_b(0:max_dim,3)
-  integer                        :: n_new,i,j
+  integer                       :: n_new, i, j
+  double precision              :: P_a(0:max_dim,3), P_b(0:max_dim,3)
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: P_a, P_b
 
   iorder(1) = 0
@@ -87,46 +87,46 @@ subroutine give_explicit_poly_and_gaussian(P_new,P_center,p,fact_k,iorder,alpha,
   P_new(0,2) = 0.d0
   P_new(0,3) = 0.d0
   !DIR$ FORCEINLINE
-  call gaussian_product(alpha,A_center,beta,B_center,fact_k,p,P_center)
-  if (fact_k < thresh) then
+  call gaussian_product(alpha, A_center, beta, B_center, fact_k, p, P_center)
+  if(fact_k < thresh) then
     ! IF fact_k is too smal then:
     ! returns a "s" function centered in zero
     ! with an inifinite exponent and a zero polynom coef
     P_center = 0.d0
-    p = 1.d+15
-    fact_k = 0.d0
+    p        = 1.d+15
+    fact_k   = 0.d0
     return
   endif
 
   !DIR$ FORCEINLINE
-  call recentered_poly2(P_a(0,1),A_center(1),P_center(1),a(1),P_b(0,1),B_center(1),P_center(1),b(1))
+  call recentered_poly2(P_a(0,1), A_center(1), P_center(1), a(1), P_b(0,1), B_center(1), P_center(1), b(1))
   iorder(1) = a(1) + b(1)
-  do i=0,iorder(1)
+  do i = 0, iorder(1)
     P_new(i,1) = 0.d0
   enddo
-  n_new=0
+  n_new = 0
   !DIR$ FORCEINLINE
-  call multiply_poly(P_a(0,1),a(1),P_b(0,1),b(1),P_new(0,1),n_new)
+  call multiply_poly(P_a(0,1), a(1), P_b(0,1), b(1), P_new(0,1), n_new)
 
   !DIR$ FORCEINLINE
-  call recentered_poly2(P_a(0,2),A_center(2),P_center(2),a(2),P_b(0,2),B_center(2),P_center(2),b(2))
+  call recentered_poly2(P_a(0,2), A_center(2), P_center(2), a(2), P_b(0,2), B_center(2), P_center(2), b(2))
   iorder(2) = a(2) + b(2)
-  do i=0,iorder(2)
+  do i = 0, iorder(2)
     P_new(i,2) = 0.d0
   enddo
-  n_new=0
+  n_new = 0
   !DIR$ FORCEINLINE
-  call multiply_poly(P_a(0,2),a(2),P_b(0,2),b(2),P_new(0,2),n_new)
+  call multiply_poly(P_a(0,2), a(2), P_b(0,2), b(2), P_new(0,2), n_new)
 
   !DIR$ FORCEINLINE
-  call recentered_poly2(P_a(0,3),A_center(3),P_center(3),a(3),P_b(0,3),B_center(3),P_center(3),b(3))
+  call recentered_poly2(P_a(0,3), A_center(3), P_center(3), a(3), P_b(0,3), B_center(3), P_center(3), b(3))
   iorder(3) = a(3) + b(3)
-  do i=0,iorder(3)
+  do i = 0, iorder(3)
     P_new(i,3) = 0.d0
   enddo
-  n_new=0
+  n_new = 0
   !DIR$ FORCEINLINE
-  call multiply_poly(P_a(0,3),a(3),P_b(0,3),b(3),P_new(0,3),n_new)
+  call multiply_poly(P_a(0,3), a(3), P_b(0,3), b(3), P_new(0,3), n_new)
 
 end
 
@@ -167,26 +167,33 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_k, io
 
   call gaussian_product_v(alpha, A_center, LD_A, beta, B_center, fact_k, p, P_center, n_points)
 
-  if ( ior(ior(b(1),b(2)),b(3)) == 0 ) then  ! b == (0,0,0)
-
-    lda = maxval(a)
-    ldb = 0
-    allocate(P_a(n_points,0:lda,3), P_b(n_points,0:0,3))
-
-    call recentered_poly2_v0(P_a, lda, A_center, LD_A, P_center, a, P_b, B_center, P_center, n_points)
+  if(ior(ior(b(1), b(2)), b(3)) == 0) then  ! b == (0,0,0)
 
     iorder(1:3) = a(1:3)
+
+    lda = maxval(a)
+    allocate(P_a(n_points,0:lda,3))
+    !ldb = 0
+    !allocate(P_b(n_points,0:0,3))
+
+    !call recentered_poly2_v0(P_a, lda, A_center, LD_A, P_center, a, P_b, B_center, P_center, n_points)
+    call recentered_poly2_v0(P_a, lda, A_center, LD_A, P_center, a, n_points)
+
     do ipoint = 1, n_points
       do xyz = 1, 3
-        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        !P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz)
         do i = 1, a(xyz)
-          P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+          !P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+          P_new(ipoint,i,xyz) = P_a(ipoint,i,xyz)
         enddo
       enddo
     enddo
 
-    return
+    deallocate(P_a)
+    !deallocate(P_b)
 
+    return
   endif
 
   lda = maxval(a)
@@ -198,20 +205,27 @@ subroutine give_explicit_poly_and_gaussian_v(P_new, ldp, P_center, p, fact_k, io
   iorder(1:3) = a(1:3) + b(1:3)
 
   do xyz = 1, 3
-    if (b(xyz) == 0) then
+    if(b(xyz) == 0) then
+
       do ipoint = 1, n_points
-        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        !P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz) * P_b(ipoint,0,xyz)
+        P_new(ipoint,0,xyz) = P_a(ipoint,0,xyz)
         do i = 1, a(xyz)
-          P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+          !P_new(ipoint,i,xyz) = P_new(ipoint,i,xyz) + P_b(ipoint,0,xyz) * P_a(ipoint,i,xyz)
+          P_new(ipoint,i,xyz) = P_a(ipoint,i,xyz)
         enddo
       enddo
+
     else
+
       do i = 0, iorder(xyz)
         do ipoint = 1, n_points
           P_new(ipoint,i,xyz) = 0.d0
         enddo
       enddo
+
       call multiply_poly_v(P_a(1,0,xyz), a(xyz), P_b(1,0,xyz), b(xyz), P_new(1,0,xyz), ldp, n_points)
+
     endif
   enddo
 
@@ -720,45 +734,57 @@ end subroutine recentered_poly2_v
 
 ! ---
 
-subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, P_new2, x_B, x_Q, n_points)
+!subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, P_new2, x_B, x_Q, n_points)
+subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, n_points)
 
   BEGIN_DOC
+  ! 
   ! Recenter two polynomials. Special case for b=(0,0,0)
+  ! 
+  ! (x - A)^a (x - B)^0 = (x - P + P - A)^a  (x - Q + Q - B)^0
+  !                     = (x - P + P - A)^a 
+  !
   END_DOC
 
   implicit none
   integer,          intent(in)  :: a(3), n_points, lda, LD_xA
-  double precision, intent(in)  :: x_A(LD_xA,3) 
-  double precision, intent(in)  :: x_B(3)
-  double precision, intent(in)  :: x_P(n_points,3), x_Q(n_points,3)
-  double precision, intent(out) :: P_new(n_points,0:lda,3), P_new2(n_points,3)
+  double precision, intent(in)  :: x_A(LD_xA,3), x_P(n_points,3)
+  !double precision, intent(in)  :: x_B(3), x_Q(n_points,3)
+  double precision, intent(out) :: P_new(n_points,0:lda,3)
+  !double precision, intent(out) :: P_new2(n_points,3)
+
   integer                       :: i, j, k, l, xyz, ipoint, maxab(3)
   double precision              :: fa
-  double precision, allocatable :: pows_a(:,:), pows_b(:,:)
+  double precision, allocatable :: pows_a(:,:)
+  !double precision, allocatable :: pows_b(:,:)
 
   double precision              :: binom_func
 
-  maxab(1:3) = max(a(1:3),(/0,0,0/))
+  maxab(1:3) = max(a(1:3), (/0,0,0/))
 
-  allocate( pows_a(n_points,-2:maxval(maxab)+4), pows_b(n_points,-2:maxval(maxab)+4) )
+  allocate(pows_a(n_points,-2:maxval(maxab)+4))
+  !allocate(pows_b(n_points,-2:maxval(maxab)+4))
 
   do xyz = 1, 3
-    if (a(xyz)<0) cycle
-    do ipoint=1,n_points
+    if(a(xyz) < 0) cycle
+
+    do ipoint = 1, n_points
       pows_a(ipoint,0) = 1.d0
       pows_a(ipoint,1) = (x_P(ipoint,xyz) - x_A(ipoint,xyz))
-      pows_b(ipoint,0) = 1.d0
-      pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz))
+      !pows_b(ipoint,0) = 1.d0
+      !pows_b(ipoint,1) = (x_Q(ipoint,xyz) - x_B(xyz))
     enddo
-    do i = 2,maxab(xyz)
-      do ipoint=1,n_points
-        pows_a(ipoint,i) = pows_a(ipoint,i-1)*pows_a(ipoint,1)
-        pows_b(ipoint,i) = pows_b(ipoint,i-1)*pows_b(ipoint,1)
+
+    do i = 2, maxab(xyz)
+      do ipoint = 1, n_points
+        pows_a(ipoint,i) = pows_a(ipoint,i-1) * pows_a(ipoint,1)
+        !pows_b(ipoint,i) = pows_b(ipoint,i-1) * pows_b(ipoint,1)
       enddo
     enddo
-    do ipoint=1,n_points
+
+    do ipoint = 1, n_points
       P_new (ipoint,0,xyz) =  pows_a(ipoint,a(xyz))
-      P_new2(ipoint,xyz) =  pows_b(ipoint,0)
+      !P_new2(ipoint,xyz)   =  pows_b(ipoint,0)
     enddo
     do i = 1, min(a(xyz), 20)
       fa = binom_transp(a(xyz)-i, a(xyz))
@@ -775,11 +801,12 @@ subroutine recentered_poly2_v0(P_new, lda, x_A, LD_xA, x_P, a, P_new2, x_B, x_Q,
 
   enddo !xyz
 
-  deallocate(pows_a, pows_b)
+  deallocate(pows_a)
+  !deallocate(pows_b)
 
 end subroutine recentered_poly2_v0
 
-!--
+! ---
 
 subroutine pol_modif_center(A_center, B_center, iorder, A_pol, B_pol)
 
diff --git a/src/utils/one_e_integration.irp.f b/src/utils/one_e_integration.irp.f
index c797c87e..cf417613 100644
--- a/src/utils/one_e_integration.irp.f
+++ b/src/utils/one_e_integration.irp.f
@@ -31,7 +31,10 @@ double precision function overlap_gaussian_x(A_center,B_center,alpha,beta,power_
   overlap_gaussian_x*= fact_p
 end
 
+! ---
 
+! TODO
+! gaussian_product is called twice: in give_explicit_poly_and_gaussian and here
 subroutine overlap_gaussian_xyz(A_center, B_center, alpha, beta, power_A, power_B, overlap_x, overlap_y, overlap_z, overlap, dim)
 
   BEGIN_DOC
@@ -45,51 +48,50 @@ subroutine overlap_gaussian_xyz(A_center, B_center, alpha, beta, power_A, power_
   include 'constants.include.F'
 
   implicit none
-  integer,intent(in)             :: dim ! dimension maximum for the arrays representing the polynomials
-  double precision,intent(in)    :: A_center(3),B_center(3)  ! center of the x1 functions
-  double precision, intent(in)   :: alpha,beta
-  integer,intent(in)             :: power_A(3), power_B(3) ! power of the x1 functions
-  double precision, intent(out)  :: overlap_x,overlap_y,overlap_z,overlap
-  double precision               :: P_new(0:max_dim,3),P_center(3),fact_p,p
-  double precision               :: F_integral_tab(0:max_dim)
-  integer                        :: iorder_p(3)
-  integer                        :: nmax
-  double precision               :: F_integral
+  integer,          intent(in)  :: dim                       ! dimension maximum for the arrays representing the polynomials
+  integer,          intent(in)  :: power_A(3), power_B(3)    ! power of the x1 functions
+  double precision, intent(in)  :: A_center(3), B_center(3)  ! center of the x1 functions
+  double precision, intent(in)  :: alpha, beta
+  double precision, intent(out) :: overlap_x, overlap_y, overlap_z, overlap
+  integer                       :: i, nmax, iorder_p(3)
+  double precision              :: P_new(0:max_dim,3), P_center(3), fact_p, p
+  double precision              :: F_integral_tab(0:max_dim)
+
+  double precision              :: F_integral
 
   call give_explicit_poly_and_gaussian(P_new, P_center, p, fact_p, iorder_p, alpha, beta, power_A, power_B, A_center, B_center, dim)
-  if(fact_p.lt.1d-20)then
+  if(fact_p .lt. 1d-20) then
     overlap_x = 1.d-10
     overlap_y = 1.d-10
     overlap_z = 1.d-10
-    overlap = 1.d-10
+    overlap   = 1.d-10
     return
   endif
 
   nmax = maxval(iorder_p)
-  do i = 0,nmax
-    F_integral_tab(i) = F_integral(i,p)
+  do i = 0, nmax
+    F_integral_tab(i) = F_integral(i, p)
   enddo
   overlap_x = P_new(0,1) * F_integral_tab(0)
   overlap_y = P_new(0,2) * F_integral_tab(0)
   overlap_z = P_new(0,3) * F_integral_tab(0)
 
-  integer                        :: i
   do i = 1,iorder_p(1)
     overlap_x = overlap_x + P_new(i,1) * F_integral_tab(i)
   enddo
-  call gaussian_product_x(alpha,A_center(1),beta,B_center(1),fact_p,p,P_center(1))
+  call gaussian_product_x(alpha, A_center(1), beta, B_center(1), fact_p, p, P_center(1))
   overlap_x *= fact_p
 
-  do i = 1,iorder_p(2)
+  do i = 1, iorder_p(2)
     overlap_y = overlap_y + P_new(i,2) * F_integral_tab(i)
   enddo
-  call gaussian_product_x(alpha,A_center(2),beta,B_center(2),fact_p,p,P_center(2))
+  call gaussian_product_x(alpha, A_center(2), beta, B_center(2), fact_p, p, P_center(2))
   overlap_y *= fact_p
 
   do i = 1,iorder_p(3)
     overlap_z = overlap_z + P_new(i,3) * F_integral_tab(i)
   enddo
-  call gaussian_product_x(alpha,A_center(3),beta,B_center(3),fact_p,p,P_center(3))
+  call gaussian_product_x(alpha, A_center(3), beta, B_center(3), fact_p, p, P_center(3))
   overlap_z *= fact_p
 
   overlap = overlap_x * overlap_y * overlap_z
@@ -183,7 +185,7 @@ subroutine overlap_gaussian_xyz_v(A_center, B_center, alpha, beta, power_A, powe
   double precision              :: F_integral
   double precision, allocatable :: P_new(:,:,:), P_center(:,:), fact_p(:)
 
-  ldp = maxval( power_A(1:3) + power_B(1:3) )
+  ldp = maxval(power_A(1:3) + power_B(1:3))
 
   allocate(P_new(n_points,0:ldp,3), P_center(n_points,3), fact_p(n_points))
 

From 5358bd9a612cca00ac969b5d2bb67038159a4e01 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Sat, 10 Dec 2022 15:23:50 +0100
Subject: [PATCH 09/68] working on the grad squared

---
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   | 428 +++++++++++++-----
 src/ao_many_one_e_ints/grad2_jmu_modif.irp.f  |  14 +-
 .../grad_lapl_jmu_manu.irp.f                  |  12 +-
 .../grad_lapl_jmu_modif.irp.f                 |   6 +-
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   |  90 ++--
 src/non_h_ints_mu/grad_squared.irp.f          |   1 +
 src/non_h_ints_mu/grad_squared_manu.irp.f     | 134 ++++++
 src/non_h_ints_mu/new_grad_tc.irp.f           |   2 +
 src/non_h_ints_mu/new_grad_tc_manu.irp.f      | 146 ++++++
 src/tc_scf/test_int.irp.f                     | 287 ++++++++++--
 10 files changed, 921 insertions(+), 199 deletions(-)
 create mode 100644 src/non_h_ints_mu/grad_squared_manu.irp.f
 create mode 100644 src/non_h_ints_mu/new_grad_tc_manu.irp.f

diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
index c25d8055..7367df88 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -1,110 +1,4 @@
 
-BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
-  !
-  END_DOC
-
-  implicit none
-  integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
-  double precision              :: coef, beta, B_center(3), dist
-  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
-  double precision              :: wall0, wall1
-  double precision, external    :: NAI_pol_mult_erf_ao_with1s
-  double precision :: j12_mu_r12,int_j1b
-  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
-  double precision :: beta_ij,center_ij_1s(3),factor_ij_1s
-  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
-
-  provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent
-  call wall_time(wall0)
-
-
-  int2_u_grad1u_j1b2_test = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
- !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
- !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
- !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b3_size_thr, &
- !$OMP          final_grid_points, ng_fit_jast,                  &
- !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
- !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
- !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test)
- !$OMP DO
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
-        r(1) = final_grid_points(1,ipoint)
-        r(2) = final_grid_points(2,ipoint)
-        r(3) = final_grid_points(3,ipoint)
-
-        tmp = 0.d0
-        do i_1s = 1, List_comb_b3_size_thr(j,i)
-
-          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
-          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
-          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
-          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
-          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
-          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
-          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
-          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
-                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
-                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
-
-          do i_fit = 1, ng_fit_jast
-
-            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
-            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-!            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
-            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
-
-            alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s
-            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
-            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
-            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
-
-            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-            if(expo_coef_1s .gt. 20.d0) cycle
-            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-08) cycle
-
-            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
-
-            tmp += coef_tmp * int_fit
-          enddo
-        enddo
-
-        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  do ipoint = 1, n_points_final_grid
-    do i = 2, ao_num
-      do j = 1, i-1
-        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
-
-END_PROVIDER
-
-! ---
-
 BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num, ao_num, n_points_final_grid)]
 
   BEGIN_DOC
@@ -133,7 +27,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
  !$OMP PARALLEL DEFAULT (NONE)                                       &
      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
      !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s)                &
-     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_b3_size_thr,&
+     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_thr_b3_size,&
      !$OMP          final_grid_points_transp, ng_fit_jast,               &
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
@@ -150,7 +44,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
          cycle
        endif
   
-       do i_1s = 1, List_comb_b3_size_thr(j,i)
+       do i_1s = 1, List_comb_thr_b3_size(j,i)
 
          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
@@ -222,7 +116,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
  !$OMP PARALLEL DEFAULT (NONE)                                       &
      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
      !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
-     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b3_size_thr,&
+     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size,&
      !$OMP          final_grid_points_transp, ng_fit_jast,               &
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
@@ -238,7 +132,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
        cycle
      endif
 
-      do i_1s = 1, List_comb_b3_size_thr(j,i)
+      do i_1s = 1, List_comb_thr_b3_size(j,i)
 
          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
@@ -282,3 +176,317 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3), tmp
+  double precision              :: wall0, wall1,int_j1b
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u2_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b)          & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+          
+            expo_fit = expo_gauss_j_mu_x_2(i_fit)
+            coef_fit = coef_gauss_j_mu_x_2(i_fit)
+          
+            ! ---
+          
+              int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+          
+              tmp += coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u2_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u2_j1b2_test(j,i,ipoint) = int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s, i_fit
+  double precision :: r(3), int_fit(3), expo_fit, coef_fit
+  double precision :: coef, beta, B_center(3), dist
+  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
+  double precision :: tmp_x, tmp_y, tmp_z, int_j1b
+  double precision :: wall0, wall1
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u_grad1u_x_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
+ !$OMP          tmp_x, tmp_y, tmp_z,int_j1b)                        & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP DO
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          do i_fit = 1, ng_fit_jast
+    
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+    
+            ! ---
+    
+!            call NAI_pol_x_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r, int_fit)
+!            tmp_x += coef_fit * int_fit(1)
+!            tmp_y += coef_fit * int_fit(2)
+!            tmp_z += coef_fit * int_fit(3)
+!            if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
+
+          ! ---
+
+
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-10) cycle
+            
+            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+
+            tmp_x += coef_tmp * int_fit(1)
+            tmp_y += coef_tmp * int_fit(2)
+            tmp_z += coef_tmp * int_fit(3)
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_x_j1b2_test(1,j,i,ipoint) = tmp_x
+        int2_u_grad1u_x_j1b2_test(2,j,i,ipoint) = tmp_y
+        int2_u_grad1u_x_j1b2_test(3,j,i,ipoint) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_x_j1b2_test(1,j,i,ipoint) = int2_u_grad1u_x_j1b2_test(1,i,j,ipoint)
+        int2_u_grad1u_x_j1b2_test(2,j,i,ipoint) = int2_u_grad1u_x_j1b2_test(2,i,j,ipoint)
+        int2_u_grad1u_x_j1b2_test(3,j,i,ipoint) = int2_u_grad1u_x_j1b2_test(3,i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_x_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+  double precision :: j12_mu_r12,int_j1b
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+  double precision :: beta_ij,center_ij_1s(3),factor_ij_1s
+  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+
+  provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent
+  call wall_time(wall0)
+
+
+  int2_u_grad1u_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
+ !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, &
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+
+          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+!            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 20.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-08) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+        enddo
+
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index 872bfaef..5cd2aac6 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -51,7 +51,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += -0.25d0 * coef_fit * int_fit
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -143,7 +143,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += coef_fit * int_fit
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -241,7 +241,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
           tmp_x += coef_fit * int_fit(1)
           tmp_y += coef_fit * int_fit(2)
           tmp_z += coef_fit * int_fit(3)
-          if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
+!          if( dabs(coef_fit)*(dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
 
           ! ---
 
@@ -265,7 +265,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
+!            if(dabs(coef_tmp) .lt. 1d-12) cycle
             
             call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
@@ -351,7 +351,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
           ! ---
 
           int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r)
-!          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit)*dabs(int_fit) .lt. 1d-12) cycle
 
           tmp += coef_fit * int_fit
 
@@ -375,9 +375,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
             centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-!            if(expo_coef_1s .gt. 80.d0) cycle
+            if(expo_coef_1s .gt. 80.d0) cycle
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-!            if(dabs(coef_tmp) .lt. 1d-10) cycle
+            if(dabs(coef_tmp) .lt. 1d-12) cycle
 
             int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
 
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
index 1b457d68..382f6351 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -25,7 +25,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num,
 
  !$OMP PARALLEL DEFAULT (NONE)                                                         &
  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp, int_j1b)& 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b2_size_thr, final_grid_points, &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, &
  !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b,  &
  !$OMP          v_ij_erf_rk_cst_mu_j1b_test, mu_erf,                                   &
  !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
@@ -41,7 +41,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num,
         if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
 
         tmp = 0.d0
-        do i_1s = 1, List_comb_b2_size_thr(j,i)
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
 
           coef        = List_comb_thr_b2_coef  (i_1s,j,i)
           beta        = List_comb_thr_b2_expo  (i_1s,j,i)
@@ -129,7 +129,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
  !$OMP PARALLEL DEFAULT (NONE)                                                        &
  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
  !$OMP          int_j1b, tmp_x, tmp_y, tmp_z)                                                  & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_b2_size_thr, final_grid_points,&
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
  !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
  !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
  !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
@@ -147,7 +147,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
         tmp_x = 0.d0
         tmp_y = 0.d0
         tmp_z = 0.d0
-        do i_1s = 1, List_comb_b2_size_thr(j,i)
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
 
           coef        = List_comb_thr_b2_coef  (i_1s,j,i)
           beta        = List_comb_thr_b2_expo  (i_1s,j,i)
@@ -223,7 +223,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po
  !$OMP SHARED  (n_points_final_grid, ao_num,  & 
  !$OMP          final_grid_points, ng_fit_jast,                  &
  !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
- !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_b2_size_thr,       & 
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
  !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b,      &
  !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
  !$OMP DO
@@ -238,7 +238,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po
         if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
 
         tmp = 0.d0
-        do i_1s = 1, List_comb_b2_size_thr(j,i)
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
 
           coef        = List_comb_thr_b2_coef  (i_1s,j,i)
           beta        = List_comb_thr_b2_expo  (i_1s,j,i)
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
index 6a662533..8fff961b 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
@@ -49,7 +49,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
 
         int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
         int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
-        if(dabs(int_mu - int_coulomb) .lt. 1d-10) cycle
+!        if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle
 
         tmp += coef * (int_mu - int_coulomb)
 
@@ -169,7 +169,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
 
-        if( (dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
+!        if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
 
         tmp_x += coef * (ints(1) - ints_coulomb(1))
         tmp_y += coef * (ints(2) - ints_coulomb(2))
@@ -277,7 +277,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
           B_center(3) = List_all_comb_b2_cent(3,1)
 
           int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(int_fit*coef) .lt. 1d-12) cycle
 
           tmp += coef * coef_fit * int_fit
 
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
index 606664f8..934ccab1 100644
--- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -1,70 +1,70 @@
 
- BEGIN_PROVIDER [ integer, List_comb_b2_size_thr, (ao_num, ao_num)]
-&BEGIN_PROVIDER [ integer, max_List_comb_b2_size_thr]
+ BEGIN_PROVIDER [ integer, List_comb_thr_b2_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size]
  implicit none
  integer :: i_1s,i,j,ipoint
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-10
- List_comb_b2_size_thr = 0
+ thr = 1.d-12
+ List_comb_thr_b2_size = 0
  do i = 1, ao_num
   do j = i, ao_num
    do i_1s = 1, List_all_comb_b2_size
      coef        = List_all_comb_b2_coef  (i_1s)
-     if(dabs(coef).lt.1.d-10)cycle
+     if(dabs(coef).lt.1.d-12)cycle
      beta        = List_all_comb_b2_expo  (i_1s)
-     beta = max(beta,1.d-10)
+     beta = max(beta,1.d-12)
      center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
      int_j1b = 0.d0
-     do ipoint = 1, n_points_final_grid
-      r(1:3) = final_grid_points(1:3,ipoint)
-      weight = final_weight_at_r_vector(ipoint)
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
       int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
-      List_comb_b2_size_thr(j,i) += 1
+      List_comb_thr_b2_size(j,i) += 1
      endif
    enddo
   enddo 
  enddo
  do i = 1, ao_num
   do j = 1, i-1
-    List_comb_b2_size_thr(j,i) = List_comb_b2_size_thr(i,j)
+    List_comb_thr_b2_size(j,i) = List_comb_thr_b2_size(i,j)
   enddo
  enddo
  integer :: list(ao_num)
  do i = 1, ao_num
-  list(i) = maxval(List_comb_b2_size_thr(:,i))
+  list(i) = maxval(List_comb_thr_b2_size(:,i))
  enddo
- max_List_comb_b2_size_thr = maxval(list) 
+ max_List_comb_thr_b2_size = maxval(list) 
  
 END_PROVIDER 
 
- BEGIN_PROVIDER [ double precision, List_comb_thr_b2_coef, (   max_List_comb_b2_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_expo, (   max_List_comb_b2_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_cent, (3, max_List_comb_b2_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_b2_size_thr ,ao_num, ao_num)]
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b2_coef, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_expo, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_cent, (3, max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)]
  implicit none
  integer :: i_1s,i,j,ipoint,icount
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-10
+ thr = 1.d-12
  ao_abs_comb_b2_j1b = 10000000.d0
  do i = 1, ao_num
   do j = i, ao_num
    icount = 0
    do i_1s = 1, List_all_comb_b2_size
      coef        = List_all_comb_b2_coef  (i_1s)
-     if(dabs(coef).lt.1.d-10)cycle
+     if(dabs(coef).lt.1.d-12)cycle
      beta        = List_all_comb_b2_expo  (i_1s)
      center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
      int_j1b = 0.d0
-     do ipoint = 1, n_points_final_grid
-      r(1:3) = final_grid_points(1:3,ipoint)
-      weight = final_weight_at_r_vector(ipoint)
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
@@ -83,7 +83,7 @@ END_PROVIDER
 
  do i = 1, ao_num
   do j = 1, i-1
-    do icount = 1, List_comb_b2_size_thr(j,i)
+    do icount = 1, List_comb_thr_b2_size(j,i)
      List_comb_thr_b2_coef(icount,j,i) = List_comb_thr_b2_coef(icount,i,j)
      List_comb_thr_b2_expo(icount,j,i) = List_comb_thr_b2_expo(icount,i,j)
      List_comb_thr_b2_cent(1:3,icount,j,i) = List_comb_thr_b2_cent(1:3,icount,i,j)
@@ -94,14 +94,14 @@ END_PROVIDER
 END_PROVIDER 
 
 
- BEGIN_PROVIDER [ integer, List_comb_b3_size_thr, (ao_num, ao_num)]
-&BEGIN_PROVIDER [ integer, max_List_comb_b3_size_thr]
+ BEGIN_PROVIDER [ integer, List_comb_thr_b3_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size]
  implicit none
  integer :: i_1s,i,j,ipoint
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-10
- List_comb_b3_size_thr = 0
+ thr = 1.d-12
+ List_comb_thr_b3_size = 0
  do i = 1, ao_num
   do j = 1, ao_num
    do i_1s = 1, List_all_comb_b3_size
@@ -110,43 +110,43 @@ END_PROVIDER
      center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
      if(dabs(coef).lt.thr)cycle
      int_j1b = 0.d0
-     do ipoint = 1, n_points_final_grid
-      r(1:3) = final_grid_points(1:3,ipoint)
-      weight = final_weight_at_r_vector(ipoint)
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
       int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
-      List_comb_b3_size_thr(j,i) += 1
+      List_comb_thr_b3_size(j,i) += 1
      endif
    enddo
   enddo 
  enddo
 ! do i = 1, ao_num
 !  do j = 1, i-1
-!    List_comb_b3_size_thr(j,i) = List_comb_b3_size_thr(i,j)
+!    List_comb_thr_b3_size(j,i) = List_comb_thr_b3_size(i,j)
 !  enddo
 ! enddo
  integer :: list(ao_num)
  do i = 1, ao_num
-  list(i) = maxval(List_comb_b3_size_thr(:,i))
+  list(i) = maxval(List_comb_thr_b3_size(:,i))
  enddo
- max_List_comb_b3_size_thr = maxval(list) 
- print*,'max_List_comb_b3_size_thr =  ',max_List_comb_b3_size_thr
+ max_List_comb_thr_b3_size = maxval(list) 
+ print*,'max_List_comb_thr_b3_size =  ',max_List_comb_thr_b3_size
  
 END_PROVIDER 
 
- BEGIN_PROVIDER [ double precision, List_comb_thr_b3_coef, (   max_List_comb_b3_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_expo, (   max_List_comb_b3_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_cent, (3, max_List_comb_b3_size_thr,ao_num, ao_num )]
-&BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_b3_size_thr ,ao_num, ao_num)]
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b3_coef, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_expo, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_cent, (3, max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)]
  implicit none
  integer :: i_1s,i,j,ipoint,icount
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-10
+ thr = 1.d-12
  ao_abs_comb_b3_j1b = 10000000.d0
  do i = 1, ao_num
   do j = 1, ao_num
@@ -154,13 +154,13 @@ END_PROVIDER
    do i_1s = 1, List_all_comb_b3_size
      coef        = List_all_comb_b3_coef  (i_1s)
      beta        = List_all_comb_b3_expo  (i_1s)
-     beta = max(beta,1.d-10)
+     beta = max(beta,1.d-12)
      center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
      if(dabs(coef).lt.thr)cycle
      int_j1b = 0.d0
-     do ipoint = 1, n_points_final_grid
-      r(1:3) = final_grid_points(1:3,ipoint)
-      weight = final_weight_at_r_vector(ipoint)
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
@@ -179,7 +179,7 @@ END_PROVIDER
 
 ! do i = 1, ao_num
 !  do j = 1, i-1
-!    do icount = 1, List_comb_b3_size_thr(j,i)
+!    do icount = 1, List_comb_thr_b3_size(j,i)
 !     List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j)
 !     List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j)
 !     List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j)
diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f
index 4e70bc5c..c941b427 100644
--- a/src/non_h_ints_mu/grad_squared.irp.f
+++ b/src/non_h_ints_mu/grad_squared.irp.f
@@ -290,6 +290,7 @@ BEGIN_PROVIDER [ double precision, u12sq_j1bsq, (ao_num, ao_num, n_points_final_
 
 END_PROVIDER 
 
+! ---
 ! ---
 
 BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num, n_points_final_grid) ]
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
new file mode 100644
index 00000000..c4189535
--- /dev/null
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -0,0 +1,134 @@
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao_test(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
+  ac_mat = 0.d0
+  allocate(bc_mat(ao_num,ao_num,ao_num,ao_num))
+  bc_mat = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    weight1 = final_weight_at_r_vector(ipoint)
+
+    do i = 1, ao_num
+      ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+
+      do k = 1, ao_num
+        ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+
+        do j = 1, ao_num
+          do l = 1, ao_num
+            ac_mat(k,i,l,j) += ao_ik_r * ( u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) )
+            bc_mat(k,i,l,j) += ao_ik_r * grad12_j12(l,j,ipoint)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+  deallocate(bc_mat)
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_final_grid) ]
+
+  implicit none
+  integer                    :: ipoint, i, j
+  double precision           :: tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1
+  double precision           :: time0, time1
+
+  print*, ' providing u12sq_j1bsq_test ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+    tmp1  = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        u12sq_j1bsq_test(i,j,ipoint) = tmp1 * int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+
+    x     = final_grid_points(1,ipoint)
+    y     = final_grid_points(2,ipoint)
+    z     = final_grid_points(3,ipoint)
+    tmp_v = v_1b       (ipoint)
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+
+    tmp3 = tmp_v * tmp_x
+    tmp4 = tmp_v * tmp_y
+    tmp5 = tmp_v * tmp_z
+
+    tmp6 = -x * tmp3
+    tmp7 = -y * tmp4
+    tmp8 = -z * tmp5
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
+
+        u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(1,i,j,ipoint) &
+                                                     + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(2,i,j,ipoint) &
+                                                     + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2_test(3,i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f
index d34e629c..e05492f0 100644
--- a/src/non_h_ints_mu/new_grad_tc.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc.irp.f
@@ -82,6 +82,7 @@ END_PROVIDER
 
 ! ---
 
+
 BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
@@ -149,3 +150,4 @@ END_PROVIDER
 
 ! ---
 
+
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
new file mode 100644
index 00000000..79d845e8
--- /dev/null
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -0,0 +1,146 @@
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_ao_test(:,i,j,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int2_grad1_u12_ao_test(:,i,j,ipoint) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int2_grad1_u12_ao_test(:,i,j,ipoint) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
+  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(1,i,j,ipoint) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,i,j,ipoint) - tmp2 * tmp_x
+          int2_grad1_u12_ao_test(2,i,j,ipoint) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,i,j,ipoint) - tmp2 * tmp_y
+          int2_grad1_u12_ao_test(3,i,j,ipoint) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,i,j,ipoint) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(1,i,j,ipoint) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(1,i,j,ipoint)
+          int2_grad1_u12_ao_test(2,i,j,ipoint) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(2,i,j,ipoint)
+          int2_grad1_u12_ao_test(3,i,j,ipoint) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(3,i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+    int2_grad1_u12_ao_test *= 0.5d0
+
+  endif
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
+  double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision, allocatable :: ac_mat(:,:,:,:)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
+  ac_mat = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+
+    do i = 1, ao_num
+      ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
+      ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
+      ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
+      ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+
+      do k = 1, ao_num
+        ao_k_r = aos_in_r_array_transp(ipoint,k)
+
+        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
+        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
+        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
+
+        do j = 1, ao_num
+          do l = 1, ao_num
+
+            contrib_x = int2_grad1_u12_ao_test(1,l,j,ipoint) * tmp_x 
+            contrib_y = int2_grad1_u12_ao_test(2,l,j,ipoint) * tmp_y 
+            contrib_z = int2_grad1_u12_ao_test(3,l,j,ipoint) * tmp_z 
+
+            ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ 
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+
+END_PROVIDER 
+
+! ---
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index 6961d2f0..545dec48 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -9,20 +9,37 @@ program test_ints
   print *, 'starting ...'
 
   my_grid_becke  = .True.
-!  my_n_pt_r_grid = 30
+  my_n_pt_r_grid = 10
 !  my_n_pt_a_grid = 50
-  my_n_pt_r_grid = 10 ! small grid for quick debug
-  my_n_pt_a_grid = 26 ! small grid for quick debug
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+  my_n_pt_a_grid = 14 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
- call routine_int2_u_grad1u_j1b2 
- call routine_v_ij_erf_rk_cst_mu_j1b
- call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
- call routine_v_ij_u_cst_mu_j1b
 
-!
-! call routine_test_j1b
+  my_n_pt_r_extra_grid = 30
+  my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug
+  touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
 
+!! OK 
+!call routine_int2_u_grad1u_j1b2 
+!! OK
+!call routine_v_ij_erf_rk_cst_mu_j1b
+!! OK 
+! call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
+!! OK
+! call routine_v_ij_u_cst_mu_j1b
+
+!! OK
+!call routine_int2_u2_j1b2
+
+!! OK
+!call routine_int2_u_grad1u_x_j1b2
+
+!! OK 
 ! call routine_int2_grad1u2_grad2u2_j1b2
+! call routine_int2_u_grad1u_j1b2
+! call test_total_grad_lapl
+ call test_total_grad_square
+
 end
 
 subroutine routine_test_j1b
@@ -42,7 +59,7 @@ subroutine routine_test_j1b
  print*,'List_all_comb_b3_coef,icount = ',List_all_comb_b3_size,icount
  do i = 1, ao_num
   do j = 1, ao_num
-   do icount = 1, List_comb_b3_size_thr(j,i)
+   do icount = 1, List_comb_thr_b3_size(j,i)
     print*,'',j,i
     print*,List_comb_thr_b3_expo(icount,j,i),List_comb_thr_b3_coef(icount,j,i)
     print*,List_comb_thr_b3_cent(1:3,icount,j,i)
@@ -51,7 +68,7 @@ subroutine routine_test_j1b
 !   enddo
   enddo
  enddo
- print*,'max_List_comb_b3_size_thr = ',max_List_comb_b3_size_thr,List_all_comb_b3_size
+ print*,'max_List_comb_thr_b3_size = ',max_List_comb_thr_b3_size,List_all_comb_b3_size
 
 end
 
@@ -221,7 +238,7 @@ end
 
 
 
-subroutine routine_v_ij_u_cst_mu_j1b
+subroutine routine_v_ij_u_cst_mu_j1b_test
  implicit none
  integer :: i,j,ipoint,k,l
  double precision :: weight,accu_relat, accu_abs, contrib
@@ -286,13 +303,13 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
  double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
  double precision, allocatable :: ints(:,:,:)
  allocate(ints(ao_num, ao_num, n_points_final_grid))
- do ipoint = 1, n_points_final_grid
-  do i = 1, ao_num
-   do j = 1, ao_num
-    read(33,*)ints(j,i,ipoint)
-   enddo
-  enddo
- enddo
+! do ipoint = 1, n_points_final_grid
+!  do i = 1, ao_num
+!   do j = 1, ao_num
+!    read(33,*)ints(j,i,ipoint)
+!   enddo
+!  enddo
+! enddo
 
  allocate(array(ao_num, ao_num, ao_num, ao_num))
  array = 0.d0
@@ -306,18 +323,18 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
      do j = 1, ao_num
       array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !     !array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !     !array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-!     !array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !     !array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
-       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint)).gt.1.d-6)then
-        print*,j,i,ipoint
-        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint))
+!      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint)).gt.1.d-6)then
+!        print*,j,i,ipoint
+!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint))
 !        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint))
-        stop
-       endif
-      endif
+!        stop
+!       endif
+!      endif
      enddo
     enddo
    enddo
@@ -343,4 +360,218 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
 
   
 
+end
+
+subroutine routine_int2_u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_int2_u_grad1u_x_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += int2_u_grad1u_x_j1b2_test(m,j,i,ipoint) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += int2_u_grad1u_x_j1b2(m,j,i,ipoint)      * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_u_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine test_total_grad_lapl
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_and_lapl_ao_test(j,i,l,k) - tc_grad_and_lapl_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_and_lapl_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_and_lapl_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_total_grad_square
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_square_ao_test(j,i,l,k) - tc_grad_square_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_square_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_square_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
 end

From ce79917152da002f47ab2052cd9b85646d610aec Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Sat, 10 Dec 2022 17:40:17 +0100
Subject: [PATCH 10/68] gain a factor 27 on tc_scf

---
 src/non_h_ints_mu/grad_squared_manu.irp.f | 54 ++++++++++++++++++++++-
 src/non_h_ints_mu/total_tc_int.irp.f      | 47 +++++++++++++++-----
 src/tc_keywords/EZFIO.cfg                 |  6 +++
 src/tc_scf/test_int.irp.f                 | 17 +++++--
 4 files changed, 109 insertions(+), 15 deletions(-)

diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
index c4189535..ada174e5 100644
--- a/src/non_h_ints_mu/grad_squared_manu.irp.f
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -29,7 +29,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu
         do j = 1, ao_num
           do l = 1, ao_num
             ac_mat(k,i,l,j) += ao_ik_r * ( u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) )
-            bc_mat(k,i,l,j) += ao_ik_r * grad12_j12(l,j,ipoint)
+            bc_mat(k,i,l,j) += ao_ik_r * grad12_j12_test(l,j,ipoint)
           enddo
         enddo
       enddo
@@ -132,3 +132,55 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: r(3), delta, coef
+  double precision           :: tmp1
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing grad12_j12_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      tmp1 = v_1b(ipoint)
+      tmp1 = tmp1 * tmp1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          grad12_j12_test(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    grad12_j12_test = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            grad12_j12_test(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for grad12_j12_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f
index 979296d1..bdd5e5ac 100644
--- a/src/non_h_ints_mu/total_tc_int.irp.f
+++ b/src/non_h_ints_mu/total_tc_int.irp.f
@@ -8,16 +8,20 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao
   double precision :: wall1, wall0
 
   call wall_time(wall0)
-
-  do j = 1, ao_num
-    do l = 1, ao_num
-      do i = 1, ao_num
-        do k = 1, ao_num
-          ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
-        enddo
-      enddo
-    enddo
-  enddo
+  
+  if(test_cycle_tc)then
+   ao_tc_int_chemist = ao_tc_int_chemist_test
+  else
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+  endif
 
   call wall_time(wall1)
   print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0
@@ -26,6 +30,29 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: wall1, wall0
+
+  call wall_time(wall0)
+
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_and_lapl_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+  call wall_time(wall1)
+  print *, ' wall time for ao_tc_int_chemist_test ', wall1 - wall0
+END_PROVIDER 
+
+! ---
+
 BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num) ]
 
   BEGIN_DOC
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index a206dfa9..0cbdb753 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -166,3 +166,9 @@ doc: Thresholds on the Imag part of energy
 interface: ezfio,provider,ocaml
 default: 1.e-7
 
+[test_cycle_tc]
+type: logical
+doc: If |true|, the integrals of the three-body jastrow are computed with cycles
+interface: ezfio,provider,ocaml
+default: False
+
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index 545dec48..1947bb92 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -9,10 +9,10 @@ program test_ints
   print *, 'starting ...'
 
   my_grid_becke  = .True.
-  my_n_pt_r_grid = 10
-!  my_n_pt_a_grid = 50
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
 !  my_n_pt_r_grid = 10 ! small grid for quick debug
-  my_n_pt_a_grid = 14 ! small grid for quick debug
+!  my_n_pt_a_grid = 14 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 
   my_n_pt_r_extra_grid = 30
@@ -38,10 +38,19 @@ program test_ints
 ! call routine_int2_grad1u2_grad2u2_j1b2
 ! call routine_int2_u_grad1u_j1b2
 ! call test_total_grad_lapl
- call test_total_grad_square
+! call test_total_grad_square
+ call test_ao_tc_int_chemist
 
 end
 
+subroutine test_ao_tc_int_chemist
+ implicit none
+ provide ao_tc_int_chemist
+! provide ao_tc_int_chemist_test
+! provide tc_grad_square_ao_test
+! provide tc_grad_and_lapl_ao_test
+end
+
 subroutine routine_test_j1b
  implicit none
  integer :: i,icount,j

From 23ec3ba18af62b13e97c0fecc4e7b06a30a208e1 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Mon, 12 Dec 2022 16:49:31 +0100
Subject: [PATCH 11/68] trying to prune grid points per AO couples

---
 src/ao_many_one_e_ints/ao_gaus_gauss.irp.f    | 47 +++++++++
 .../grad_lapl_jmu_manu.irp.f                  | 99 ++++++++++++++++++-
 src/ao_many_one_e_ints/list_grid.irp.f        | 59 +++++++++++
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   |  2 +-
 .../prim_int_gauss_gauss.irp.f                | 68 ++++++++++++-
 src/ao_tc_eff_map/fit_j.irp.f                 | 25 +++++
 src/bi_ort_ints/semi_num_ints_mo.irp.f        | 30 ++++--
 src/dft_utils_in_r/ao_in_r.irp.f              | 41 ++++++++
 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f      | 32 +++---
 src/non_h_ints_mu/grad_squared_manu.irp.f     |  2 +-
 src/tc_scf/test_int.irp.f                     | 53 +++++++++-
 11 files changed, 424 insertions(+), 34 deletions(-)
 create mode 100644 src/ao_many_one_e_ints/list_grid.irp.f

diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
index 213a63e4..ad215b41 100644
--- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -156,6 +156,53 @@ end function overlap_gauss_r12_ao
 
 ! --
 
+double precision function overlap_abs_gauss_r12_ao(D_center, delta, i, j)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: D_center(3), delta
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j
+
+  double precision, external   :: overlap_abs_gauss_r12
+
+  overlap_abs_gauss_r12_ao = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      analytical_j = overlap_abs_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+      overlap_abs_gauss_r12_ao += dabs(coef * analytical_j)
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao
+
+! --
+
 subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
 
   BEGIN_DOC
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
index 382f6351..c7a171f8 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -94,9 +94,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_nu
   do ipoint = 1, n_points_final_grid
     do i = 1, ao_num
       do j = 1, ao_num
-        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(1,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(2,j,i,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b_test(3,j,i,ipoint)
       enddo
     enddo
   enddo
@@ -285,3 +285,96 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) with u(mu,r12) \approx 1/2 mu e^{-2.5 * mu (r12)^2}
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_ng_1_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s,  r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, expo_good_j_mu_1gauss,coef_good_j_mu_1gauss,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_good_j_mu_1gauss
+            coef_fit = 1.d0
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+!          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_ng_1_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_ng_1_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/list_grid.irp.f b/src/ao_many_one_e_ints/list_grid.irp.f
new file mode 100644
index 00000000..ccdc33ad
--- /dev/null
+++ b/src/ao_many_one_e_ints/list_grid.irp.f
@@ -0,0 +1,59 @@
+ BEGIN_PROVIDER [ integer, n_pts_grid_ao_prod, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_n_pts_grid_ao_prod]
+ implicit none
+ integer :: i,j,ipoint
+ double precision :: overlap, r(3),thr, overlap_abs_gauss_r12_ao,overlap_gauss_r12_ao
+ double precision :: sigma,dist,center_ij(3),fact_gauss, alpha, center(3)
+ n_pts_grid_ao_prod = 0
+ thr = 1.d-11
+ print*,' expo_good_j_mu_1gauss = ',expo_good_j_mu_1gauss
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, r, overlap, thr,fact_gauss, alpha, center,dist,sigma,center_ij) &
+ !$OMP SHARED  (n_points_final_grid, ao_num, ao_overlap_abs_grid,n_pts_grid_ao_prod,expo_good_j_mu_1gauss,&
+ !$OMP          final_grid_points,ao_prod_center,ao_prod_sigma,ao_nucl)
+ !$OMP DO
+ do i = 1, ao_num
+! do i = 3,3
+  do j = 1, ao_num
+! do i = 22,22
+!  do j = 9,9
+   center_ij(1:3) = ao_prod_center(1:3,j,i)
+   sigma = ao_prod_sigma(j,i)
+   sigma *= sigma
+   sigma = 0.5d0 /sigma
+!   if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+   do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+    dist  = (center_ij(1) - r(1))*(center_ij(1) - r(1))
+    dist += (center_ij(2) - r(2))*(center_ij(2) - r(2))
+    dist += (center_ij(3) - r(3))*(center_ij(3) - r(3))
+    dist = dsqrt(dist)
+    call gaussian_product(sigma, center_ij, expo_good_j_mu_1gauss, r, fact_gauss, alpha, center)
+!    print*,''
+!    print*,j,i,ao_overlap_abs_grid(j,i),ao_overlap_abs(j,i)
+!    print*,r
+!    print*,dist,sigma
+!    print*,fact_gauss
+    if( fact_gauss*ao_overlap_abs_grid(j,i).lt.1.d-11)cycle
+    if(ao_nucl(i) == ao_nucl(j))then
+     overlap = overlap_abs_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    else
+     overlap = overlap_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    endif
+!    print*,overlap
+    if(dabs(overlap).lt.thr)cycle
+    n_pts_grid_ao_prod(j,i) += 1
+   enddo
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(n_pts_grid_ao_prod(:,i))
+ enddo
+ max_n_pts_grid_ao_prod = maxval(list) 
+END_PROVIDER 
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
index 934ccab1..9481d363 100644
--- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -116,7 +116,7 @@ END_PROVIDER
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
-      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
       List_comb_thr_b3_size(j,i) += 1
diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
index cfdaf95f..dcd1db66 100644
--- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -26,14 +26,16 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
   dim1=100
   thr = 1.d-10
   d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_gauss_r12 = 0.d0
 
   ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
   call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
       delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
   ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
   accu = 0.d0
   do lx = 0, iorder_a_new(1)
-    coefx = A_new(lx,1)
+    coefx = A_new(lx,1)*fact_a_new
     if(dabs(coefx).lt.thr)cycle
     iorder_tmp(1) = lx
     do ly = 0, iorder_a_new(2)
@@ -51,7 +53,69 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
       enddo
     enddo
   enddo
-  overlap_gauss_r12 = fact_a_new * accu
+  overlap_gauss_r12 = accu
+end
+
+!---
+double precision function overlap_abs_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) |(x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )|
+  !
+  END_DOC
+
+  implicit none
+  include 'constants.include.F'
+  double precision, intent(in)   :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in)   :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)            :: power_A(3),power_B(3)
+
+  double precision               :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision               :: A_new(0:max_dim,3)! new polynom
+  double precision               :: A_center_new(3)   ! new center
+  integer                        :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision               :: alpha_new         ! new exponent
+  double precision               :: fact_a_new        ! constant factor
+  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,dx,lower_exp_val
+  integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
+  dim1=50
+  lower_exp_val = 40.d0
+  thr = 1.d-12
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_abs_gauss_r12 = 0.d0
+
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)*fact_a_new
+!    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+    do ly = 0, iorder_a_new(2)
+      coefy = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy).lt.thr)cycle
+      iorder_tmp(2) = ly
+      do lz = 0, iorder_a_new(3)
+        coefz = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz).lt.thr)cycle
+        iorder_tmp(3) = lz
+        call overlap_x_abs(A_center_new(1),B_center(1),alpha_new,beta,iorder_tmp(1),power_B(1),overlap_x,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(2),B_center(2),alpha_new,beta,iorder_tmp(2),power_B(2),overlap_y,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(3),B_center(3),alpha_new,beta,iorder_tmp(3),power_B(3),overlap_z,lower_exp_val,dx,dim1)
+        accu += dabs(coefxyz * overlap_x * overlap_y * overlap_z)
+      enddo
+    enddo
+  enddo
+  overlap_abs_gauss_r12= accu
 end
 
 !---
diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f
index 8fad9079..d861054e 100644
--- a/src/ao_tc_eff_map/fit_j.irp.f
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@@ -1,5 +1,30 @@
+ BEGIN_PROVIDER [ double precision, expo_j_xmu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_j_xmu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! Upper bound long range fit of F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) 
+ !
+ ! with a single gaussian. 
+ !
+ ! Such a function can be used to screen integrals with F(x). 
+ END_DOC
+ expo_j_xmu_1gauss  = 0.5d0
+ coef_j_xmu_1gauss  = 1.d0
+END_PROVIDER 
 ! ---
 
+ BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! exponent of Gaussian in order to obtain an upper bound of J(r12,mu)
+ !
+ ! Can be used to scree integrals with J(r12,mu)
+ END_DOC
+ expo_good_j_mu_1gauss = 2.D0 * mu_erf * expo_j_xmu_1gauss
+ coef_good_j_mu_1gauss = 0.5d0/mu_erf * coef_j_xmu_1gauss
+ END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, expo_j_xmu, (n_fit_1_erf_x) ]
  implicit none
  BEGIN_DOC
diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
index 4762c25e..746593dc 100644
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -108,15 +108,27 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
   double precision :: wall0, wall1
 
   call wall_time(wall0)
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = 1, ao_num
-        int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(1,j,i,ipoint)
-        int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(2,j,i,ipoint)
-        int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(3,j,i,ipoint)
-      enddo
-    enddo
-  enddo
+  if(test_cycle_tc)then
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(1,j,i,ipoint)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(2,j,i,ipoint)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(3,j,i,ipoint)
+       enddo
+     enddo
+   enddo
+  else
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(1,j,i,ipoint)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(2,j,i,ipoint)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(3,j,i,ipoint)
+       enddo
+     enddo
+   enddo
+  endif
   call wall_time(wall1)
   print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0
 
diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f
index 6fa6a4c7..72f820ec 100644
--- a/src/dft_utils_in_r/ao_in_r.irp.f
+++ b/src/dft_utils_in_r/ao_in_r.irp.f
@@ -40,6 +40,47 @@
  END_PROVIDER
 
 
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i,r,aos_array,j) & 
+ !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra)
+ do i = 1, n_points_extra_final_grid
+  r(1) = final_grid_points_extra(1,i)
+  r(2) = final_grid_points_extra(2,i)
+  r(3) = final_grid_points_extra(3,i)
+  call give_all_aos_at_r(r,aos_array)
+  do j = 1, ao_num
+   aos_in_r_array_extra(j,i) = aos_array(j)
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ do i = 1, n_points_extra_final_grid
+  do j = 1, ao_num
+   aos_in_r_array_extra_transp(i,j) = aos_in_r_array_extra(j,i) 
+  enddo
+ enddo
+
+ END_PROVIDER
+
+
 
  BEGIN_PROVIDER[double precision, aos_grad_in_r_array, (ao_num,n_points_final_grid,3)]
  implicit none
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
index 1af34d74..9393668f 100644
--- a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -38,15 +38,15 @@ BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
    enddo
   enddo
  enddo
- do i = 1, ao_num
-  do j = 1, ao_num
-   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
-    do m = 1, 3
-     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
-    enddo
-   endif
-  enddo
- enddo
+! do i = 1, ao_num
+!  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+!    do m = 1, 3
+!     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+!    enddo
+!   endif
+!  enddo
+! enddo
 
 END_PROVIDER 
 
@@ -76,13 +76,13 @@ BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)]
   enddo
  enddo
 
- do i = 1, ao_num
-  do j = 1, ao_num
-   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
-     ao_prod_sigma(j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
-   endif
-  enddo
- enddo
+! do i = 1, ao_num
+!  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+!     ao_prod_sigma(j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+!   endif
+!  enddo
+! enddo
 
 END_PROVIDER 
 
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
index ada174e5..9b4cbfbd 100644
--- a/src/non_h_ints_mu/grad_squared_manu.irp.f
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -116,7 +116,7 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao
     do j = 1, ao_num
       do i = 1, ao_num
 
-        tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
+        tmp9 = int2_u_grad1u_j1b2_test(i,j,ipoint)
 
         u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(1,i,j,ipoint) &
                                                      + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(2,i,j,ipoint) &
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index 1947bb92..f3a396be 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -14,7 +14,7 @@ program test_ints
 !  my_n_pt_r_grid = 10 ! small grid for quick debug
 !  my_n_pt_a_grid = 14 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
-
+  my_extra_grid_becke = .True.
   my_n_pt_r_extra_grid = 30
   my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug
   touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
@@ -39,7 +39,8 @@ program test_ints
 ! call routine_int2_u_grad1u_j1b2
 ! call test_total_grad_lapl
 ! call test_total_grad_square
- call test_ao_tc_int_chemist
+! call test_ao_tc_int_chemist
+ call test_grid_points_ao
 
 end
 
@@ -584,3 +585,51 @@ subroutine test_total_grad_square
 
 
 end
+
+subroutine test_grid_points_ao
+ implicit none
+ integer :: i,j,ipoint,icount,icount_good, icount_bad,icount_full
+ double precision :: thr
+ thr = 1.d-10
+! print*,'max_n_pts_grid_ao_prod = ',max_n_pts_grid_ao_prod
+! print*,'n_pts_grid_ao_prod'
+ do i = 1, ao_num
+  do j = i, ao_num
+  icount = 0
+  icount_good = 0
+  icount_bad = 0
+  icount_full = 0
+  do ipoint = 1, n_points_final_grid
+!   if(dabs(int2_u_grad1u_x_j1b2_test(1,j,i,ipoint)) & 
+! + dabs(int2_u_grad1u_x_j1b2_test(2,j,i,ipoint)) &
+! + dabs(int2_u_grad1u_x_j1b2_test(2,j,i,ipoint)) )
+!   if(dabs(int2_u2_j1b2_test(j,i,ipoint)).gt.thr)then
+!    icount += 1
+!   endif
+   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_full += 1
+   endif
+   if(dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)).gt.thr)then
+    icount += 1
+    if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_good += 1
+    else
+    print*,j,i,ipoint
+    print*,dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint))/dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint))
+    icount_bad  += 1
+    endif
+   endif
+!   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr)then
+!   endif
+  enddo
+   print*,''
+   print*,j,i
+   print*,icount,icount_full, icount_bad!,n_pts_grid_ao_prod(j,i)
+   print*,dble(icount)/dble(n_points_final_grid),dble(icount_full)/dble(n_points_final_grid)
+!          dble(n_pts_grid_ao_prod(j,i))/dble(n_points_final_grid)
+!   if(icount.gt.n_pts_grid_ao_prod(j,i))then
+!    print*,'pb !!'
+!   endif
+  enddo
+ enddo
+end

From a34653b5d19d05407eac1459c540c2625085260e Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 13 Dec 2022 18:40:37 +0100
Subject: [PATCH 12/68] improved the building of tc_grad_and_lapl_ao_test and
 #tc_grad_square_ao_test

---
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   | 63 +++++++--------
 .../grad_lapl_jmu_manu.irp.f                  |  8 +-
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   | 12 +--
 src/non_h_ints_mu/grad_squared_manu.irp.f     | 27 ++++---
 src/non_h_ints_mu/new_grad_tc_manu.irp.f      | 46 ++++++-----
 src/tc_scf/test_int.irp.f                     | 78 ++++++++++++-------
 6 files changed, 138 insertions(+), 96 deletions(-)

diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
index 7367df88..6f7b29d2 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -1,5 +1,5 @@
 
-BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
 
   BEGIN_DOC
   !
@@ -16,13 +16,14 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
 
   double precision, allocatable :: int_fit_v(:)
   double precision, external    :: overlap_gauss_r12_ao_with1s
-  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),int_j1b,int_gauss,dsqpi_3_2
+  double precision :: int_gauss,dsqpi_3_2,int_j1b
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3)
   dsqpi_3_2 = (dacos(-1.d0))**(3/2)
 
   provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
   call wall_time(wall0)
 
- int2_grad1u2_grad2u2_j1b2_test_no_v(:,:,:) = 0.d0
+ int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                       &
      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
@@ -31,7 +32,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
      !$OMP          final_grid_points_transp, ng_fit_jast,               &
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
-     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test_no_v, ao_abs_comb_b3_j1b,&
+     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,&
      !$OMP          ao_overlap_abs,dsqpi_3_2)
  !$OMP DO SCHEDULE(dynamic)
  do ipoint = 1, n_points_final_grid
@@ -49,7 +50,6 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
-!         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
@@ -57,15 +57,16 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
          do i_fit = 1, ng_fit_jast
   
            expo_fit = expo_gauss_1_erf_x_2(i_fit)
-!           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+           !DIR$ FORCEINLINE
+           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
            coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
-!           if(dabs(coef_fit)*factor_ij_1s*dabs(int_j1b).lt.1.d-15)cycle
+           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle
   
 !           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
 !                 expo_fit, i, j, int_fit_v, n_points_final_grid)
            int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
   
-           int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) += coef_fit * int_gauss 
+           int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss 
   
          enddo
         enddo
@@ -79,17 +80,17 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_no_v, (ao_num,
   do ipoint = 1, n_points_final_grid
     do i = 1, ao_num
       do j = 1, i-1
-        int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint)
+        int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_no_v', wall1 - wall0
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test', wall1 - wall0
 
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao_num, n_points_final_grid)]
 !
 !  BEGIN_DOC
 !  !
@@ -104,15 +105,15 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
   double precision              :: tmp
   double precision              :: wall0, wall1
 
-  double precision, allocatable :: int_fit_v(:)
+  double precision, allocatable :: int_fit_v(:),big_array(:,:,:)
   double precision, external    :: overlap_gauss_r12_ao_with1s
 
   provide mu_erf final_grid_points_transp j1b_pen
   call wall_time(wall0)
 
  double precision :: int_j1b
- int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
-!
+ big_array(:,:,:) = 0.d0
+ allocate(big_array(n_points_final_grid,ao_num, ao_num))
  !$OMP PARALLEL DEFAULT (NONE)                                       &
      !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
      !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
@@ -120,7 +121,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
      !$OMP          final_grid_points_transp, ng_fit_jast,               &
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
-     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test,&
+     !$OMP          List_comb_thr_b3_cent, big_array,&
      !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
 !
  allocate(int_fit_v(n_points_final_grid))
@@ -151,7 +152,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
                expo_fit, i, j, int_fit_v, size(int_fit_v,1),n_points_final_grid)
 
          do ipoint = 1, n_points_final_grid
-           int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_fit_v(ipoint)
+           big_array(ipoint,j,i) += coef_fit * int_fit_v(ipoint)
          enddo
 
        enddo
@@ -162,17 +163,24 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
  !$OMP END DO
  deallocate(int_fit_v)
  !$OMP END PARALLEL
+ do i = 1, ao_num
+   do j = i, ao_num
+    do ipoint = 1, n_points_final_grid
+     int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,j,i)
+    enddo
+   enddo
+  enddo
 
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
       do j = 1, i-1
-        int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+        int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,i,j)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test', wall1 - wall0
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_v', wall1 - wall0
 
 END_PROVIDER
 
@@ -192,6 +200,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
 
   double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3)
 
   provide mu_erf final_grid_points j1b_pen
   call wall_time(wall0)
@@ -200,7 +209,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b)          & 
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b,factor_ij_1s,beta_ij,center_ij_1s)          & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
  !$OMP          final_grid_points, ng_fit_jast,                     &
  !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
@@ -231,6 +240,9 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
           
             expo_fit = expo_gauss_j_mu_x_2(i_fit)
             coef_fit = coef_gauss_j_mu_x_2(i_fit)
+            !DIR$ FORCEINLINE
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle
           
             ! ---
           
@@ -323,17 +335,6 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
             expo_fit = expo_gauss_j_mu_1_erf(i_fit)
             coef_fit = coef_gauss_j_mu_1_erf(i_fit)
     
-            ! ---
-    
-!            call NAI_pol_x_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r, int_fit)
-!            tmp_x += coef_fit * int_fit(1)
-!            tmp_y += coef_fit * int_fit(2)
-!            tmp_z += coef_fit * int_fit(3)
-!            if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
-
-          ! ---
-
-
             dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
                         + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
                         + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
@@ -347,7 +348,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
+            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle
             
             call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
index c7a171f8..13ca41f2 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -119,7 +119,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
   double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
   double precision :: tmp_x, tmp_y, tmp_z
   double precision :: wall0, wall1
-  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s
   dsqpi_3_2 = (dacos(-1.d0))**(3/2)
 
   call wall_time(wall0)
@@ -128,7 +128,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
 
  !$OMP PARALLEL DEFAULT (NONE)                                                        &
  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
- !$OMP          int_j1b, tmp_x, tmp_y, tmp_z)                                                  & 
+ !$OMP          int_j1b, tmp_x, tmp_y, tmp_z,factor_ij_1s,beta_ij,center_ij_1s)                                                  & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
  !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
  !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
@@ -157,6 +157,10 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
 
+          ! approximate 1 - erf(mu r12) = exp(-2 mu r12^2)
+!          !DIR$ FORCEINLINE
+!          call gaussian_product(expo_good_j_mu_1gauss,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+!          if(dabs(coef * factor_ij_1s*int_j1b).lt.1.d-10)cycle 
           call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
           call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
 
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
index 9481d363..d22f1ac8 100644
--- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -5,13 +5,13 @@
  integer :: i_1s,i,j,ipoint
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-12
+ thr = 1.d-15
  List_comb_thr_b2_size = 0
  do i = 1, ao_num
   do j = i, ao_num
    do i_1s = 1, List_all_comb_b2_size
      coef        = List_all_comb_b2_coef  (i_1s)
-     if(dabs(coef).lt.1.d-12)cycle
+     if(dabs(coef).lt.1.d-15)cycle
      beta        = List_all_comb_b2_expo  (i_1s)
      beta = max(beta,1.d-12)
      center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
@@ -51,7 +51,7 @@ END_PROVIDER
  integer :: i_1s,i,j,ipoint,icount
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-12
+ thr = 1.d-15
  ao_abs_comb_b2_j1b = 10000000.d0
  do i = 1, ao_num
   do j = i, ao_num
@@ -100,7 +100,7 @@ END_PROVIDER
  integer :: i_1s,i,j,ipoint
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-12
+ thr = 1.d-15
  List_comb_thr_b3_size = 0
  do i = 1, ao_num
   do j = 1, ao_num
@@ -146,7 +146,7 @@ END_PROVIDER
  integer :: i_1s,i,j,ipoint,icount
  double precision :: coef,beta,center(3),int_j1b,thr
  double precision :: r(3),weight,dist
- thr = 1.d-12
+ thr = 1.d-15
  ao_abs_comb_b3_j1b = 10000000.d0
  do i = 1, ao_num
   do j = 1, ao_num
@@ -164,7 +164,7 @@ END_PROVIDER
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
-      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
       icount += 1
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
index 9b4cbfbd..e1fc4d86 100644
--- a/src/non_h_ints_mu/grad_squared_manu.irp.f
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -9,8 +9,11 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu
 
   implicit none
   integer                       :: ipoint, i, j, k, l
-  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: weight1, ao_ik_r, ao_i_r,contrib,contrib2
   double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:)
+  double precision :: wall1, wall0
+  provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test
+  call wall_time(wall0)
 
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
@@ -20,16 +23,18 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu
   do ipoint = 1, n_points_final_grid
     weight1 = final_weight_at_r_vector(ipoint)
 
-    do i = 1, ao_num
-      ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+    do j = 1, ao_num
+      do l = 1, ao_num
+        contrib =  u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) 
+        contrib2=grad12_j12_test(l,j,ipoint)
+        do i = 1, ao_num
+          ao_i_r = weight1 * aos_in_r_array(i,ipoint)
+        
+          do k = 1, ao_num
+            ao_ik_r = ao_i_r * aos_in_r_array(k,ipoint)
 
-      do k = 1, ao_num
-        ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
-
-        do j = 1, ao_num
-          do l = 1, ao_num
-            ac_mat(k,i,l,j) += ao_ik_r * ( u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) )
-            bc_mat(k,i,l,j) += ao_ik_r * grad12_j12_test(l,j,ipoint)
+            ac_mat(k,i,l,j) += ao_ik_r * contrib
+            bc_mat(k,i,l,j) += ao_ik_r * contrib2
           enddo
         enddo
       enddo
@@ -45,6 +50,8 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_nu
       enddo
     enddo
   enddo
+  call wall_time(wall1)
+  print*,'wall time for tc_grad_square_ao_test',wall1 - wall0
 
   deallocate(ac_mat)
   deallocate(bc_mat)
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
index 79d845e8..cceb0991 100644
--- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -95,34 +95,38 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_
   double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
   double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
   double precision, allocatable :: ac_mat(:,:,:,:)
+  double precision :: wall0, wall1
 
+  provide int2_grad1_u12_ao_test 
+  call wall_time(wall0)
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
 
   do ipoint = 1, n_points_final_grid
     weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+    do j = 1, ao_num
+      do l = 1, ao_num
+       contrib_x = int2_grad1_u12_ao_test(1,l,j,ipoint)
+       contrib_y = int2_grad1_u12_ao_test(2,l,j,ipoint)
+       contrib_z = int2_grad1_u12_ao_test(3,l,j,ipoint)
+       do i = 1, ao_num
+         ao_i_r  = weight1 * aos_in_r_array                (i,ipoint)
+         ao_i_dx = weight1 * aos_grad_in_r_array_transp(1,i,ipoint)
+         ao_i_dy = weight1 * aos_grad_in_r_array_transp(2,i,ipoint)
+         ao_i_dz = weight1 * aos_grad_in_r_array_transp(3,i,ipoint)
+       
+         do k = 1, ao_num
+           ao_k_r = aos_in_r_array(k,ipoint)
+       
+           tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp(1,k,ipoint) 
+           tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp(2,k,ipoint) 
+           tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp(3,k,ipoint) 
 
-    do i = 1, ao_num
-      ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
-      ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
-      ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
-      ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+           tmp_x *= contrib_x 
+           tmp_y *= contrib_y 
+           tmp_z *= contrib_z 
 
-      do k = 1, ao_num
-        ao_k_r = aos_in_r_array_transp(ipoint,k)
-
-        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
-        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
-        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
-
-        do j = 1, ao_num
-          do l = 1, ao_num
-
-            contrib_x = int2_grad1_u12_ao_test(1,l,j,ipoint) * tmp_x 
-            contrib_y = int2_grad1_u12_ao_test(2,l,j,ipoint) * tmp_y 
-            contrib_z = int2_grad1_u12_ao_test(3,l,j,ipoint) * tmp_z 
-
-            ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+            ac_mat(k,i,l,j) += tmp_x + tmp_y + tmp_z
           enddo
         enddo
       enddo
@@ -139,6 +143,8 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_
     enddo
   enddo
 
+  call wall_time(wall1)
+  print*,'wall time for tc_grad_and_lapl_ao_test',wall1 - wall0
   deallocate(ac_mat)
 
 END_PROVIDER 
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index f3a396be..d0217423 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -11,8 +11,8 @@ program test_ints
   my_grid_becke  = .True.
   my_n_pt_r_grid = 30
   my_n_pt_a_grid = 50
-!  my_n_pt_r_grid = 10 ! small grid for quick debug
-!  my_n_pt_a_grid = 14 ! small grid for quick debug
+!  my_n_pt_r_grid = 15 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
   my_extra_grid_becke = .True.
   my_n_pt_r_extra_grid = 30
@@ -40,10 +40,21 @@ program test_ints
 ! call test_total_grad_lapl
 ! call test_total_grad_square
 ! call test_ao_tc_int_chemist
- call test_grid_points_ao
+! call test_grid_points_ao
+ call test_tc_scf
 
 end
 
+subroutine test_tc_scf
+ implicit none
+! provide tc_grad_square_ao_test
+  provide tc_grad_and_lapl_ao_test
+! provide int2_u_grad1u_x_j1b2_test
+! provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test
+! print*,'TC_HF_energy = ',TC_HF_energy
+! print*,'grad_non_hermit = ',grad_non_hermit
+end
+
 subroutine test_ao_tc_int_chemist
  implicit none
  provide ao_tc_int_chemist
@@ -309,17 +320,18 @@ end
 subroutine routine_int2_grad1u2_grad2u2_j1b2
  implicit none
  integer :: i,j,ipoint,k,l
+ integer :: ii , jj
  double precision :: weight,accu_relat, accu_abs, contrib
  double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
  double precision, allocatable :: ints(:,:,:)
  allocate(ints(ao_num, ao_num, n_points_final_grid))
-! do ipoint = 1, n_points_final_grid
-!  do i = 1, ao_num
-!   do j = 1, ao_num
-!    read(33,*)ints(j,i,ipoint)
-!   enddo
-!  enddo
-! enddo
+ do ipoint = 1, n_points_final_grid
+  do i = 1, ao_num
+   do j = 1, ao_num
+    read(33,*)ints(j,i,ipoint)
+   enddo
+  enddo
+ enddo
 
  allocate(array(ao_num, ao_num, ao_num, ao_num))
  array = 0.d0
@@ -331,17 +343,17 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
    do l = 1, ao_num
     do i = 1, ao_num
      do j = 1, ao_num
-      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !     !array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !     !array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
-!     !array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
 !      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
-!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint)).gt.1.d-6)then
+!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
 !        print*,j,i,ipoint
-!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(j,i,ipoint))
-!        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test_no_v(i,j,ipoint))
+!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint))
+!        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint))
 !        stop
 !       endif
 !      endif
@@ -350,23 +362,35 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
    enddo
   enddo
  enddo
+ double precision :: e_ref, e_new
  accu_relat = 0.d0
  accu_abs   = 0.d0
-  do k = 1, ao_num
-   do l = 1, ao_num
-    do i = 1, ao_num
-     do j = 1, ao_num
-      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
-      accu_abs += contrib
-      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
-       accu_relat += contrib/dabs(array_ref(j,i,l,k))
-      endif
+ e_ref = 0.d0
+ e_new = 0.d0
+ do ii = 1, elec_alpha_num
+  do jj = ii, elec_alpha_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     do i = 1, ao_num
+      do j = 1, ao_num
+       e_ref += mo_coef(j,ii) * mo_coef(i,ii) * array_ref(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       e_new += mo_coef(j,ii) * mo_coef(i,ii) * array(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+       accu_abs += contrib
+!       if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+!        accu_relat += contrib/dabs(array_ref(j,i,l,k))
+!       endif
+      enddo
      enddo
     enddo
    enddo
+
   enddo
- print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
- print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+ enddo
+ print*,'e_ref = ',e_ref
+ print*,'e_new = ',e_new
+! print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+! print*,'accu_relat = ',accu_relat/dble(ao_num)**4
 
   
 

From 0b7c5fcb977592c6807eb80fe2cddcdd852e40db Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 14 Dec 2022 18:47:32 +0100
Subject: [PATCH 13/68] trying to optimize the most intensive part

---
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   | 31 ++++---
 .../grad_lapl_jmu_manu.irp.f                  | 21 +++--
 src/ao_tc_eff_map/fit_j.irp.f                 | 10 +++
 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f      | 83 ++++++++++++++-----
 src/non_h_ints_mu/grad_squared_manu.irp.f     |  3 +-
 src/tc_scf/test_int.irp.f                     | 49 ++++++++++-
 6 files changed, 151 insertions(+), 46 deletions(-)

diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
index 6f7b29d2..d5210aa7 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -17,8 +17,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
   double precision, allocatable :: int_fit_v(:)
   double precision, external    :: overlap_gauss_r12_ao_with1s
   double precision :: int_gauss,dsqpi_3_2,int_j1b
-  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3)
-  dsqpi_3_2 = (dacos(-1.d0))**(3/2)
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2 
+  sq_pi_3_2 = (dacos(-1.d0))**(3/2)
 
   provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
   call wall_time(wall0)
@@ -33,7 +33,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
      !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,&
-     !$OMP          ao_overlap_abs,dsqpi_3_2)
+     !$OMP          ao_overlap_abs,sq_pi_3_2)
  !$OMP DO SCHEDULE(dynamic)
  do ipoint = 1, n_points_final_grid
    r(1) = final_grid_points(1,ipoint)
@@ -60,7 +60,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
            !DIR$ FORCEINLINE
            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
            coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
-           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle
+!           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+           if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-10)cycle
   
 !           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
 !                 expo_fit, i, j, int_fit_v, n_points_final_grid)
@@ -200,7 +201,8 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
 
   double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
-  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3)
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2
+  sq_pi_3_2 = (dacos(-1.d0))**(3/2)
 
   provide mu_erf final_grid_points j1b_pen
   call wall_time(wall0)
@@ -213,7 +215,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
  !$OMP          final_grid_points, ng_fit_jast,                     &
  !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
- !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2,       & 
  !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
  !$OMP DO
   do ipoint = 1, n_points_final_grid
@@ -242,7 +244,8 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
             coef_fit = coef_gauss_j_mu_x_2(i_fit)
             !DIR$ FORCEINLINE
             call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle
+!            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-10)cycle
           
             ! ---
           
@@ -291,8 +294,8 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
   double precision :: coef, beta, B_center(3), dist
   double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
   double precision :: tmp_x, tmp_y, tmp_z, int_j1b
-  double precision :: wall0, wall1
-
+  double precision :: wall0, wall1, sq_pi_3_2,sq_alpha
+  sq_pi_3_2 = dacos(-1.D0)**(3/2)
   provide mu_erf final_grid_points j1b_pen
   call wall_time(wall0)
 
@@ -302,12 +305,12 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
  !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
  !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
- !$OMP          tmp_x, tmp_y, tmp_z,int_j1b)                        & 
+ !$OMP          tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha)                        & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
  !$OMP          final_grid_points, ng_fit_jast,                     &
  !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
  !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
- !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2)
  !$OMP DO
 
   do ipoint = 1, n_points_final_grid
@@ -348,7 +351,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle
+            sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv)
+!            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version
+            if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle
             
             call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
@@ -450,7 +455,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p
 
             expo_fit = expo_gauss_j_mu_1_erf(i_fit)
             call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-!            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
+            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-3/2).lt.1.d-15)cycle
             coef_fit = coef_gauss_j_mu_1_erf(i_fit)
 
             alpha_1s     = beta + expo_fit
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
index 13ca41f2..f71a66e6 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -50,7 +50,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num,
           B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
-
+          ! TODO :: cycle on the 1 - erf(mur12)
           int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
           int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
 
@@ -122,6 +122,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
   double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s
   dsqpi_3_2 = (dacos(-1.d0))**(3/2)
 
+  provide expo_erfc_mu_gauss ao_prod_sigma ao_prod_center
   call wall_time(wall0)
 
   x_v_ij_erf_rk_cst_mu_tmp_j1b_test = 0.d0
@@ -132,9 +133,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
  !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
  !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
- !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma)
+! !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss)
  !$OMP DO
-  !do ipoint = 1, 10
   do ipoint = 1, n_points_final_grid
     r(1) = final_grid_points(1,ipoint)
     r(2) = final_grid_points(2,ipoint)
@@ -142,7 +143,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
 
     do i = 1, ao_num
       do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
 
         tmp_x = 0.d0
         tmp_y = 0.d0
@@ -157,10 +158,14 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
 
-          ! approximate 1 - erf(mu r12) = exp(-2 mu r12^2)
-!          !DIR$ FORCEINLINE
-!          call gaussian_product(expo_good_j_mu_1gauss,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
-!          if(dabs(coef * factor_ij_1s*int_j1b).lt.1.d-10)cycle 
+!          if(ao_prod_center(1,j,i).ne.10000.d0)then
+!           ! approximate 1 - erf(mu r12) by a gaussian * 10
+!           !DIR$ FORCEINLINE
+!           call gaussian_product(expo_erfc_mu_gauss,r,     &
+!                ao_prod_sigma(j,i),ao_prod_center(1,j,i),  & 
+!                factor_ij_1s,beta_ij,center_ij_1s)
+!           if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-3/2)).lt.1.d-10)cycle 
+!          endif
           call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
           call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
 
diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f
index d861054e..902d4514 100644
--- a/src/ao_tc_eff_map/fit_j.irp.f
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@@ -13,6 +13,16 @@
 END_PROVIDER 
 ! ---
 
+BEGIN_PROVIDER [ double precision, expo_erfc_gauss ]
+ implicit none 
+ expo_erfc_gauss = 1.41211d0
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, expo_erfc_mu_gauss ]
+ implicit none 
+ expo_erfc_mu_gauss = expo_erfc_gauss * mu_erf * mu_erf
+END_PROVIDER 
+
  BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ]
 &BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ]
  implicit none
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
index 9393668f..39ea0cdf 100644
--- a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -1,5 +1,28 @@
+
+BEGIN_PROVIDER [ double precision, ao_abs_int_grid, (ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_abs_int_grid(i) = \int dr |phi_i(r) |
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_abs_int_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+    contrib = dabs(aos_in_r_array(i,ipoint)) * weight
+    ao_abs_int_grid(i) += contrib 
+  enddo
+ enddo
+
+END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, ao_overlap_abs_grid, (ao_num, ao_num)]
  implicit none
+ BEGIN_DOC
+! ao_overlap_abs_grid(j,i) = \int dr |phi_i(r) phi_j(r)| 
+ END_DOC
  integer :: i,j,ipoint
  double precision :: contrib, weight,r(3)
  ao_overlap_abs_grid = 0.D0
@@ -21,7 +44,7 @@ BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
  BEGIN_DOC
 ! ao_prod_center(1:3,j,i) = \int dr |phi_i(r) phi_j(r)| x/y/z / \int |phi_i(r) phi_j(r)|
 !
-! if \int |phi_i(r) phi_j(r)| < 1.d-15 then ao_prod_center = 0.
+! if \int |phi_i(r) phi_j(r)| < 1.d-10 then ao_prod_center = 10000.
  END_DOC
  integer :: i,j,m,ipoint
  double precision :: contrib, weight,r(3)
@@ -38,26 +61,29 @@ BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
    enddo
   enddo
  enddo
-! do i = 1, ao_num
-!  do j = 1, ao_num
-!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
-!    do m = 1, 3
-!     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
-!    enddo
-!   endif
-!  enddo
-! enddo
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+    do m = 1, 3
+     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+    enddo
+   else
+    do m = 1, 3
+     ao_prod_center(m,j,i) = 10000.d0
+    enddo
+   endif
+  enddo
+ enddo
 
 END_PROVIDER 
 
-BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)]
+BEGIN_PROVIDER [ double precision, ao_prod_abs_r, (ao_num, ao_num)]
  implicit none
  BEGIN_DOC
-! ao_prod_sigma(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
+! ao_prod_abs_r(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
 !
-! gives you a precise idea of the spatial extension of the distribution phi_i(r) phi_j(r)
  END_DOC
- ao_prod_sigma = 0.d0
+ ao_prod_abs_r = 0.d0
  integer :: i,j,m,ipoint
  double precision :: contrib, weight,r(3),contrib_x2
  do ipoint = 1,n_points_final_grid 
@@ -71,21 +97,34 @@ BEGIN_PROVIDER [ double precision, ao_prod_sigma, (ao_num, ao_num)]
      contrib_x2 += (r(m) - ao_prod_center(m,j,i)) * (r(m) - ao_prod_center(m,j,i)) 
     enddo
     contrib_x2 = dsqrt(contrib_x2)
-    ao_prod_sigma(j,i) += contrib * contrib_x2
+    ao_prod_abs_r(j,i) += contrib * contrib_x2
    enddo
   enddo
  enddo
 
-! do i = 1, ao_num
-!  do j = 1, ao_num
-!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
-!     ao_prod_sigma(j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
-!   endif
-!  enddo
-! enddo
 
 END_PROVIDER 
 
+ BEGIN_PROVIDER [double precision, ao_prod_sigma, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! Gaussian exponent reproducing the product |chi_i(r) chi_j(r)| 
+!
+! Therefore |chi_i(r) chi_j(r)|  \approx e^{-ao_prod_sigma(j,i) (r - ao_prod_center(1:3,j,i))**2}
+ END_DOC
+ integer :: i,j
+ double precision :: pi,alpha
+ pi = dacos(-1.d0)
+ do i = 1, ao_num
+  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-5)then
+     alpha = 1.d0/pi * (2.d0*ao_overlap_abs_grid(j,i)/ao_prod_abs_r(j,i))**2
+     ao_prod_sigma(j,i) = alpha
+!   endif
+  enddo
+ enddo
+ END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_final_grid)]
  implicit none
  BEGIN_DOC
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
index e1fc4d86..14749082 100644
--- a/src/non_h_ints_mu/grad_squared_manu.irp.f
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -99,6 +99,7 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao
   double precision           :: time0, time1
   double precision, external :: overlap_gauss_r12_ao
 
+  provide int2_u_grad1u_x_j1b2_test
   print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...'
   call wall_time(time0)
 
@@ -147,7 +148,7 @@ BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_fi
   double precision           :: tmp1
   double precision           :: time0, time1
   double precision, external :: overlap_gauss_r12_ao
-
+  provide int2_grad1u2_grad2u2_j1b2_test
   print*, ' providing grad12_j12_test ...'
   call wall_time(time0)
 
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index d0217423..a81b09d5 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -41,14 +41,21 @@ program test_ints
 ! call test_total_grad_square
 ! call test_ao_tc_int_chemist
 ! call test_grid_points_ao
- call test_tc_scf
+! call test_tc_scf
+ call test_int_gauss
 
 end
 
 subroutine test_tc_scf
  implicit none
+ integer :: i
+! provide int2_u_grad1u_x_j1b2_test
+ provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test
+! do i = 1, ng_fit_jast
+!  print*,expo_gauss_1_erf_x_2(i),coef_gauss_1_erf_x_2(i)
+! enddo
 ! provide tc_grad_square_ao_test
-  provide tc_grad_and_lapl_ao_test
+!  provide tc_grad_and_lapl_ao_test
 ! provide int2_u_grad1u_x_j1b2_test
 ! provide x_v_ij_erf_rk_cst_mu_tmp_j1b_test
 ! print*,'TC_HF_energy = ',TC_HF_energy
@@ -657,3 +664,41 @@ subroutine test_grid_points_ao
   enddo
  enddo
 end
+
+subroutine test_int_gauss
+ implicit none
+ integer :: i,j
+ print*,'center'
+ do i = 1, ao_num
+  do j = i, ao_num
+   print*,j,i
+   print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i)
+   print*,ao_prod_center(1:3,j,i)
+  enddo
+ enddo
+ print*,''
+ double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2
+ center = 0.d0
+ pi = dacos(-1.d0)
+ integral_1 = 0.d0
+ integral_2 = 0.d0
+ alpha = 0.75d0
+ do i = 1,  n_points_final_grid
+  ! you get x, y and z of the ith grid point
+  r(1) = final_grid_points(1,i)
+  r(2) = final_grid_points(2,i)
+  r(3) = final_grid_points(3,i)
+  weight = final_weight_at_r_vector(i)
+  distance = dsqrt( (r(1) - center(1))**2 +  (r(2) - center(2))**2 + (r(3) - center(3))**2 )
+  f_r = dexp(-alpha * distance*distance)
+  ! you add the contribution of the grid point to the integral
+  integral_1 += f_r * weight
+  integral_2 += f_r * distance * weight
+ enddo
+ print*,'integral_1      =',integral_1
+ print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5
+ print*,'integral_2      =',integral_2
+ print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2
+
+
+end

From 3f0326e6c2ec666e5d6ed593531fc4406343c23a Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 16 Dec 2022 00:05:51 +0100
Subject: [PATCH 14/68] fixed stupid bug in
 ao_many_one_e_ints/listj1b_sorted.irp.f

---
 src/ao_many_one_e_ints/listj1b_sorted.irp.f | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
index d22f1ac8..bf493fbb 100644
--- a/src/ao_many_one_e_ints/listj1b_sorted.irp.f
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -22,7 +22,7 @@
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
-      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
       List_comb_thr_b2_size(j,i) += 1
@@ -68,7 +68,7 @@ END_PROVIDER
       dist  = ( center(1) - r(1) )*( center(1) - r(1) )
       dist += ( center(2) - r(2) )*( center(2) - r(2) )
       dist += ( center(3) - r(3) )*( center(3) - r(3) )
-      int_j1b += dabs(aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,j))*dexp(-beta*dist) * weight
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
      enddo
      if(dabs(coef)*dabs(int_j1b).gt.thr)then
       icount += 1

From a3bc5fd421f6225fadea961bc8452b00c3769967 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 20 Dec 2022 00:07:52 +0100
Subject: [PATCH 15/68] added the computation of 3-e term for energy in
 open-shell tc_scf

---
 src/tc_bi_ortho/test_tc_fock.irp.f | 82 ++++++++++++++++++++++++++++--
 src/tc_scf/fock_tc.irp.f           |  4 +-
 2 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f
index 26446daf..ebd43a7a 100644
--- a/src/tc_bi_ortho/test_tc_fock.irp.f
+++ b/src/tc_bi_ortho/test_tc_fock.irp.f
@@ -15,7 +15,8 @@ program test_tc_fock
   !call routine_2
 !  call routine_3()
 
- call test_3e
+! call test_3e
+ call routine_tot
 end
 
 ! ---
@@ -84,8 +85,8 @@ subroutine routine_3()
         print*, i, a
         stop
        endif
-       !print*, ' excited det'
-       !call debug_det(det_i, N_int)
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
  
        call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
        if(dabs(hthree).lt.1.d-10)cycle
@@ -116,3 +117,78 @@ subroutine routine_3()
 end subroutine routine_3
 
 ! ---
+subroutine routine_tot()
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i, a, i_ok, s1,other_spin(2)
+  double precision               :: hmono, htwoe, hthree, htilde_ij
+  double precision               :: err_ai, err_tot, ref, new
+  integer(bit_kind), allocatable :: det_i(:,:)
+
+  allocate(det_i(N_int,2))
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  err_tot = 0.d0
+ 
+!  do s1 = 1, 2
+   s1 = 2
+   det_i = ref_bitmask
+   call debug_det(det_i, N_int)
+   print*, ' HF det'
+   call debug_det(det_i, N_int)
+ 
+!   do i = 1, elec_num_tab(s1)
+!     do a = elec_num_tab(s1)+1, mo_num ! virtual 
+   do i = 1, elec_beta_num
+     do a = elec_beta_num+1, elec_alpha_num! virtual 
+!   do i = elec_beta_num+1, elec_alpha_num
+!     do a = elec_alpha_num+1, mo_num! virtual 
+       print*,i,a 
+ 
+       det_i = ref_bitmask
+       call do_single_excitation(det_i, i, a, s1, i_ok)
+       if(i_ok == -1) then
+        print*, 'PB !!'
+        print*, i, a
+        stop
+       endif
+ 
+       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       print*,htilde_ij
+       if(dabs(htilde_ij).lt.1.d-10)cycle
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
+
+       if(s1 == 1)then
+        new = Fock_matrix_tc_mo_alpha(a,i)
+       else
+        new = Fock_matrix_tc_mo_beta(a,i)
+       endif
+       ref = htilde_ij
+!       if(s1 == 1)then
+!        new = fock_a_tot_3e_bi_orth(a,i)
+!       else if(s1 == 2)then
+!        new = fock_b_tot_3e_bi_orth(a,i)
+!       endif
+       err_ai = dabs(dabs(ref) - dabs(new))
+       if(err_ai .gt. 1d-7) then
+         print*,'s1 = ',s1
+         print*, ' warning on', i, a
+         print*, ref,new,err_ai
+       endif
+       print*, ref,new,err_ai
+       err_tot += err_ai
+ 
+       write(22, *) htilde_ij
+     enddo
+   enddo
+!  enddo
+
+  print *, ' err_tot = ', err_tot
+
+  deallocate(det_i)
+
+end subroutine routine_3
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
index c3642a7e..2a08e469 100644
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@@ -89,7 +89,7 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
 
     call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
                           , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
-    if(three_body_h_tc) then
+    if(three_body_h_tc.and.elec_alpha_num == elec_beta_num) then
       Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
     endif
 
@@ -116,7 +116,7 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
                          , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
 
-    if(three_body_h_tc) then
+    if(three_body_h_tc.and.elec_alpha_num == elec_beta_num) then
       Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
     endif
 

From 76d502bd353feb37541a282d0583e665c2850677 Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Wed, 21 Dec 2022 00:40:24 +0100
Subject: [PATCH 16/68] added UHF Fock matrices

---
 src/bi_ort_ints/semi_num_ints_mo.irp.f       |  21 +
 src/bi_ort_ints/three_body_ints_bi_ort.irp.f |  43 ++-
 src/hartree_fock/fock_matrix_hf.irp.f        |  25 +-
 src/non_h_ints_mu/new_grad_tc.irp.f          | 106 +++++-
 src/tc_keywords/EZFIO.cfg                    |   2 +-
 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f        | 377 ++++++++++++++++++
 src/tc_scf/fock_tc.irp.f                     |  44 ++-
 src/tc_scf/fock_three_bi_ortho_new_new.irp.f | 380 +++++++++++--------
 src/tc_scf/tc_scf.irp.f                      |  69 +++-
 src/tc_scf/test_int.irp.f                    | 185 ++++++++-
 10 files changed, 1058 insertions(+), 194 deletions(-)
 create mode 100644 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f

diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
index 4762c25e..89098676 100644
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -170,6 +170,27 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3
  enddo
 END_PROVIDER 
 
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)]
+
+  implicit none
+  integer :: i, j, ipoint
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, mo_num
+      do j = 1, mo_num
+        int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(1,j,i,ipoint)
+        int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(2,j,i,ipoint)
+        int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(3,j,i,ipoint)
+      enddo                                  
+    enddo
+  enddo
+
+END_PROVIDER 
+
+! ---
+
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo, (3, mo_num, mo_num, n_points_final_grid)]
 
   BEGIN_DOC
diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
index c1c27f06..48fa84f7 100644
--- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
+++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
@@ -15,7 +15,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n
  character*(128)  :: name_file 
 
   three_body_ints_bi_ort = 0.d0
-  print*,'Providing the three_body_ints_bi_ort ...'
+  print *, ' Providing the three_body_ints_bi_ort ...'
   call wall_time(wall0)
   name_file = 'six_index_tensor'
 
@@ -71,7 +71,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
   !
   END_DOC
 
@@ -104,12 +104,11 @@ end subroutine give_integrals_3_body_bi_ort
 
 ! ---
 
-
 subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
   !
   END_DOC
 
@@ -170,3 +169,39 @@ end subroutine give_integrals_3_body_bi_ort_old
 
 ! ---
 
+subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral)
+
+  BEGIN_DOC
+  !
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n, l, k, m, j, i
+  double precision, intent(out) :: integral
+  integer                       :: ipoint
+  double precision              :: weight
+
+  integral = 0.d0
+  do ipoint = 1, n_points_final_grid
+    weight = final_weight_at_r_vector(ipoint)                                                                          
+
+    integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) )
+    integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+    integral += weight * aos_in_r_array_transp(ipoint,n) * aos_in_r_array_transp(ipoint,m) &
+              * ( int2_grad1_u12_ao_t(ipoint,1,l,j) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,l,j) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,l,j) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+
+  enddo
+
+end subroutine give_integrals_3_body_bi_ort_ao
+
+! ---
diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f
index d7d8fa7d..cb698fbb 100644
--- a/src/hartree_fock/fock_matrix_hf.irp.f
+++ b/src/hartree_fock/fock_matrix_hf.irp.f
@@ -1,12 +1,27 @@
+! ---
 
  BEGIN_PROVIDER [ double precision, ao_two_e_integral_alpha, (ao_num, ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta ,  (ao_num, ao_num) ]
- use map_module
- implicit none
+&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta , (ao_num, ao_num) ]
+
  BEGIN_DOC
- ! Alpha and Beta Fock matrices in AO basis set
+ !
+ ! 2-e part of alpha and beta Fock matrices (F^{a} & F^{b}) in AO basis set
+ ! 
+ ! F^{a} = h + G^{a}
+ ! F^{b} = h + G^{b}
+ ! 
+ ! where : 
+ !         F^{a} = J^{a} + J^{b} - K^{a} ==> G_{ij}^{a} = \sum_{k,l} P_{kl} (kl|ij) - P_{kl}^{a} (ki|lj)
+ !         F^{b} = J^{a} + J^{b} - K^{b} ==> G_{ij}^{b} = \sum_{k,l} P_{kl} (kl|ij) - P_{kl}^{b} (ki|lj)
+ ! 
+ ! and P_{kl} = P_{kl}^{a} + P_{kl}^{b} 
+ !
  END_DOC
 
+ use map_module
+
+ implicit none
+
  integer                        :: i,j,k,l,k1,r,s
  integer                        :: i0,j0,k0,l0
  integer*8                      :: p,q
@@ -153,6 +168,8 @@
 
 END_PROVIDER
 
+! ---
+
  BEGIN_PROVIDER [ double precision, Fock_matrix_ao_alpha, (ao_num, ao_num) ]
 &BEGIN_PROVIDER [ double precision, Fock_matrix_ao_beta,  (ao_num, ao_num) ]
  implicit none
diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f
index d34e629c..484e3850 100644
--- a/src/non_h_ints_mu/new_grad_tc.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc.irp.f
@@ -82,11 +82,77 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1)
+  !                                 = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !                                 = -int2_grad1_u12_ao(:,i,j,ipoint)
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) =      v2    x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ]
+  !                                 - \grad_2 v2 x [       \int dr1                   u12              v1 \phi_i(r1) \phi_j(r1) ] 
+  !                                 =   -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 +    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
+
+          int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint) - tmp2 * tmp_x
+          int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint) - tmp2 * tmp_y
+          int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao
+
+  endif
+
+END_PROVIDER 
+
+! ---
+
 BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
   !
-  ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
   !
   ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
   !
@@ -98,11 +164,14 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
   integer                       :: ipoint, i, j, k, l
   double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
   double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: ao_j_r, ao_l_r, ao_l_dx, ao_l_dy, ao_l_dz
   double precision, allocatable :: ac_mat(:,:,:,:)
 
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
 
+  ! ---
+
   do ipoint = 1, n_points_final_grid
     weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
 
@@ -132,12 +201,47 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
       enddo
     enddo
   enddo
+
+  ! ---
+
+  !do ipoint = 1, n_points_final_grid
+  !  weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+
+  !  do l = 1, ao_num
+  !    ao_l_r  = weight1 * aos_in_r_array_transp         (ipoint,l)
+  !    ao_l_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,1)
+  !    ao_l_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,2)
+  !    ao_l_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,3)
+
+  !    do j = 1, ao_num
+  !      ao_j_r = aos_in_r_array_transp(ipoint,j)
+
+  !      tmp_x = ao_j_r * ao_l_dx - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,1) 
+  !      tmp_y = ao_j_r * ao_l_dy - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,2) 
+  !      tmp_z = ao_j_r * ao_l_dz - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,3) 
+
+  !      do i = 1, ao_num
+  !        do k = 1, ao_num
+
+  !          contrib_x = int2_grad1_u12_ao(1,k,i,ipoint) * tmp_x 
+  !          contrib_y = int2_grad1_u12_ao(2,k,i,ipoint) * tmp_y 
+  !          contrib_z = int2_grad1_u12_ao(3,k,i,ipoint) * tmp_z 
+
+  !          ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  ! ---
  
   do j = 1, ao_num
     do l = 1, ao_num
       do i = 1, ao_num
         do k = 1, ao_num
           tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+          !tc_grad_and_lapl_ao(k,i,l,j) = ac_mat(k,i,l,j)
         enddo
       enddo
     enddo
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index a206dfa9..45af9723 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -158,7 +158,7 @@ default: 0.
 type: character*(32)
 doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS]
 interface: ezfio,provider,ocaml
-default: DIIS
+default: Simple 
 
 [im_thresh_tcscf]
 type: Threshold
diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
new file mode 100644
index 00000000..607140f9
--- /dev/null
+++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@@ -0,0 +1,377 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_cs ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_mo_cs = 0.d0
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+   
+      do j = 1, elec_beta_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_cs(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij &
+                                            +        I_bij_ija &
+                                            +        I_bij_jai &
+                                            - 2.d0 * I_bij_aji &
+                                            - 2.d0 * I_bij_iaj &
+                                            - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_a ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj &
+                                           - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           - 2.d0 * I_bij_iaj &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           + I_bij_ija &
+                                           + I_bij_jai &
+                                           - I_bij_aji &
+                                           - I_bij_iaj &
+                                           - I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_b ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           - I_bij_aji )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer          :: g, d, e, k, mu, nu
+  double precision :: dm_ge_a, dm_ge_b, dm_ge
+  double precision :: dm_dk_a, dm_dk_b, dm_dk
+  double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision :: ti, tf
+
+  print *, ' PROVIDING fock_3e_uhf_ao_a ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_a = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk,         &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a)
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+
+              fock_3e_uhf_ao_a(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                                 + dm_ge_a * dm_dk_a * i_mugd_eknu &
+                                                 + dm_ge_a * dm_dk_a * i_mugd_knue &
+                                                 - dm_ge   * dm_dk_a * i_mugd_kenu &
+                                                 - dm_ge_a * dm_dk   * i_mugd_enuk &
+                                                 - dm_ge_a * dm_dk_a * i_mugd_nuke &
+                                                 - dm_ge_b * dm_dk_b * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer          :: g, d, e, k, mu, nu
+  double precision :: dm_ge_a, dm_ge_b, dm_ge
+  double precision :: dm_dk_a, dm_dk_b, dm_dk
+  double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision :: ti, tf
+
+  print *, ' PROVIDING fock_3e_uhf_ao_b ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk,         &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b)
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+
+              fock_3e_uhf_ao_b(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                                 + dm_ge_b * dm_dk_b * i_mugd_eknu &
+                                                 + dm_ge_b * dm_dk_b * i_mugd_knue &
+                                                 - dm_ge   * dm_dk_b * i_mugd_kenu &
+                                                 - dm_ge_b * dm_dk   * i_mugd_enuk &
+                                                 - dm_ge_b * dm_dk_b * i_mugd_nuke &
+                                                 - dm_ge_a * dm_dk_a * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
index c3642a7e..5981791c 100644
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@@ -31,13 +31,22 @@
           density_b = TCSCF_density_matrix_ao_beta (l,j)
           density   = density_a + density_b                      
 
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho_a(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !!                                         rho_b(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
+
           !                                         rho(l,j)   *      < k l| T | i j>
-          two_e_tc_non_hermit_integral_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          two_e_tc_non_hermit_integral_alpha(k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
           !                                         rho(l,j)   *      < k l| T | i j>
-          two_e_tc_non_hermit_integral_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
-          !                                         rho_a(l,j) *      < l k| T | i j>
+          two_e_tc_non_hermit_integral_beta (k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
+          !                                         rho_a(l,j) *      < k l| T | j i>
           two_e_tc_non_hermit_integral_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
-          !                                         rho_b(l,j) *      < l k| T | i j>
+          !                                         rho_b(l,j) *      < k l| T | j i>
           two_e_tc_non_hermit_integral_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
 
         enddo
@@ -84,13 +93,23 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
   END_DOC
 
   implicit none
+  double precision, allocatable :: tmp(:,:)
 
   if(bi_ortho) then
 
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_alpha
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_a
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1))
+    !deallocate(tmp)
+
     call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
                           , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
     if(three_body_h_tc) then
-      Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
+      !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
+      Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a
     endif
 
   else
@@ -110,14 +129,23 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
   END_DOC
 
   implicit none
+  double precision, allocatable :: tmp(:,:)
 
   if(bi_ortho) then
 
-   call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
-                         , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_beta
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_b
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1))
+    !deallocate(tmp)
 
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+                          , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
     if(three_body_h_tc) then
-      Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
+      !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
+      Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b
     endif
 
   else
diff --git a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
index b0345957..859c06ca 100644
--- a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
+++ b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
@@ -1,202 +1,266 @@
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo,contrib
- fock_a_tot_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth(a,i)
-   fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i)
-   fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i)
+
+  implicit none
+  integer :: i, a
+
+  fock_a_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i)
+    enddo
   enddo
- enddo
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo,contrib
- fock_b_tot_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth(a,i)
-   fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i)
-   fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i)
+
+  implicit none
+  integer :: i, a
+
+  fock_b_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i)
+    enddo
   enddo
- enddo
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- double precision :: new
- fock_cs_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = 1, elec_beta_num
-    do k = 1, elec_beta_num
-!      call contrib_3e_sss(a,i,j,k,contrib_sss)
-!      call contrib_3e_soo(a,i,j,k,contrib_soo)
-!      call contrib_3e_sos(a,i,j,k,contrib_sos)
-!      contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos
 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
-      call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
- ! negative terms :: exchange contrib
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
-      new  = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) & 
-             -1.5d0 * exch_13_int - exch_23_int
-      fock_cs_3e_bi_orth(a,i) += new
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  fock_cs_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = 1, elec_beta_num
+        do k = 1, elec_beta_num
+
+          !!call contrib_3e_sss(a,i,j,k,contrib_sss)
+          !!call contrib_3e_soo(a,i,j,k,contrib_soo)
+          !!call contrib_3e_sos(a,i,j,k,contrib_sos)
+          !!contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos
+ 
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+
+          ! negative terms :: exchange contrib
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+
+          new = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) -1.5d0 * exch_13_int - exch_23_int
+
+          fock_cs_3e_bi_orth(a,i) += new
+        enddo
+      enddo
     enddo
-   enddo
-  
   enddo
- enddo
- fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
+ 
+  fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
 
 END_PROVIDER 
 
+! ---
 
 BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- double precision :: new
- fock_a_tmp1_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = elec_beta_num + 1, elec_alpha_num 
-    do k = 1, elec_beta_num
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
-      call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
-      fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) &
-      + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
-    enddo
-   enddo
 
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  fock_a_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_beta_num
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+          
+          fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
+        enddo
+      enddo
+    enddo
   enddo
- enddo
- fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
+
+  fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss
- fock_a_tmp2_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   do j = 1, elec_alpha_num
-    do k = elec_beta_num+1, elec_alpha_num
-      call contrib_3e_sss(a,i,j,k,contrib_sss)
-      fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss
+ 
+  fock_a_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_alpha_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call contrib_3e_sss(a, i, j, k, contrib_sss)
+
+          fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+        enddo
+      enddo
     enddo
-   enddo
   enddo
- enddo
+
 END_PROVIDER 
 
-
-
-
+! ---
 
 BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
- double precision :: new
- fock_b_tmp1_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = 1, elec_beta_num
-    do k = elec_beta_num+1, elec_alpha_num
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
-    enddo
-   enddo
 
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
+  double precision :: new
+
+  fock_b_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_beta_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+
+          fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
+        enddo
+      enddo
+    enddo
   enddo
- enddo
- fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
+
+  fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_soo
- fock_b_tmp2_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   do j = elec_beta_num + 1, elec_alpha_num 
-    do k = 1, elec_alpha_num
-      call contrib_3e_soo(a,i,j,k,contrib_soo)
-      fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_soo
+
+  fock_b_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_alpha_num
+          call contrib_3e_soo(a, i, j, k, contrib_soo)
+
+          fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+        enddo
+      enddo
     enddo
-   enddo
   enddo
- enddo
+
 END_PROVIDER 
 
-subroutine contrib_3e_sss(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
- call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
- integral = direct_int + c_3_int + c_minus_3_int 
- ! negative terms :: exchange contrib
- call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
- call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
- call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
- integral += - exch_13_int - exch_23_int  - exch_12_int 
- integral = -integral
+! ---
+
+subroutine contrib_3e_sss(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+ 
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+  call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+  integral = direct_int + c_3_int + c_minus_3_int 
+
+  ! negative terms :: exchange contrib
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+  call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+  integral += - exch_13_int - exch_23_int  - exch_12_int 
+
+  integral = -integral
+
 end
 
+! ---
+
 subroutine contrib_3e_soo(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_23_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
- integral = direct_int - exch_23_int 
- integral = -integral
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_23_int
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
+  integral = direct_int - exch_23_int 
+
+  integral = -integral
+
 end
 
-subroutine contrib_3e_sos(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_13_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
- integral = direct_int - exch_13_int 
- integral = -integral
+! ---
+
+subroutine contrib_3e_sos(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
+  integral = direct_int - exch_13_int 
+
+  integral = -integral
+
 end
+
+! ---
+
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
index c84c837f..5090cc97 100644
--- a/src/tc_scf/tc_scf.irp.f
+++ b/src/tc_scf/tc_scf.irp.f
@@ -145,8 +145,10 @@ subroutine simple_tcscf()
       endif
       e_delta = dabs(TC_HF_energy - e_save)
 
-      print *, ' delta E  = ', e_delta
-      print *, ' gradient = ', grad_non_hermit
+      print *, ' delta E           = ', e_delta
+      print *, ' gradient          = ', grad_non_hermit
+      print *, ' max TC DIIS error = ', maxval(abs(FQS_SQF_mo))
+
       !print *, ' gradient= ', grad_non_hermit_right
 
       !rho_new   = TCSCF_bi_ort_dm_ao
@@ -168,6 +170,8 @@ subroutine simple_tcscf()
       TOUCH mo_l_coef mo_r_coef
       call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
 
+      !call test_fock_3e_uhf_mo()
+
       print *, ' ***'
       print *, ''
 
@@ -202,3 +206,64 @@ end subroutine simple_tcscf
 
 ! ---
 
+subroutine test_fock_3e_uhf_mo()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff_ij, thr_ih, norm
+
+  thr_ih = 1d-12
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_b
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_a     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, '      norm_a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_b     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, '      norm_b = ', norm
+  print *, ' '
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_mo()
+
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index 6961d2f0..e12e629b 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -9,22 +9,29 @@ program test_ints
   print *, 'starting ...'
 
   my_grid_becke  = .True.
-!  my_n_pt_r_grid = 30
-!  my_n_pt_a_grid = 50
-  my_n_pt_r_grid = 10 ! small grid for quick debug
-  my_n_pt_a_grid = 26 ! small grid for quick debug
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+ ! my_n_pt_r_grid = 10 ! small grid for quick debug
+ ! my_n_pt_a_grid = 26 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
- call routine_int2_u_grad1u_j1b2 
- call routine_v_ij_erf_rk_cst_mu_j1b
- call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
- call routine_v_ij_u_cst_mu_j1b
+ !call routine_int2_u_grad1u_j1b2 
+ !call routine_v_ij_erf_rk_cst_mu_j1b
+ !call routine_x_v_ij_erf_rk_cst_mu_tmp_j1b
+ !call routine_v_ij_u_cst_mu_j1b
 
 !
 ! call routine_test_j1b
 
-! call routine_int2_grad1u2_grad2u2_j1b2
+ !call routine_int2_grad1u2_grad2u2_j1b2
+
+
+  !call test_fock_3e_uhf_ao()
+  call test_fock_3e_uhf_mo()
+
 end
 
+! ---
+
 subroutine routine_test_j1b
  implicit none
  integer :: i,icount,j
@@ -286,13 +293,13 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
  double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
  double precision, allocatable :: ints(:,:,:)
  allocate(ints(ao_num, ao_num, n_points_final_grid))
- do ipoint = 1, n_points_final_grid
-  do i = 1, ao_num
-   do j = 1, ao_num
-    read(33,*)ints(j,i,ipoint)
-   enddo
-  enddo
- enddo
+! do ipoint = 1, n_points_final_grid
+!  do i = 1, ao_num
+!   do j = 1, ao_num
+!    read(33,*)ints(j,i,ipoint)
+!   enddo
+!  enddo
+! enddo
 
  allocate(array(ao_num, ao_num, ao_num, ao_num))
  array = 0.d0
@@ -344,3 +351,149 @@ subroutine routine_int2_grad1u2_grad2u2_j1b2
   
 
 end
+
+! ---
+
+subroutine test_fock_3e_uhf_ao()
+
+  implicit none
+  integer                       :: i, j
+  double precision              :: diff_tot, diff_ij, thr_ih, norm
+  double precision, allocatable :: fock_3e_uhf_ao_a_mo(:,:), fock_3e_uhf_ao_b_mo(:,:)
+
+  thr_ih = 1d-7
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+
+  ! ---
+
+  PROVIDE fock_3e_uhf_ao_a 
+
+  allocate(fock_3e_uhf_ao_a_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_a   , size(fock_3e_uhf_ao_a   , 1) &
+                        , fock_3e_uhf_ao_a_mo, size(fock_3e_uhf_ao_a_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_a_mo(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_a_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_a_mo)
+
+  ! ---
+
+  PROVIDE fock_3e_uhf_ao_b
+
+  allocate(fock_3e_uhf_ao_b_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_b   , size(fock_3e_uhf_ao_b   , 1) &
+                        , fock_3e_uhf_ao_b_mo, size(fock_3e_uhf_ao_b_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_b_mo(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_b_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+
+  deallocate(fock_3e_uhf_ao_b_mo)
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_ao()
+
+! ---
+
+subroutine test_fock_3e_uhf_mo()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff_ij, thr_ih, norm
+
+  thr_ih = 1d-12
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_b
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_a     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, '      norm_a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_b     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, '      norm_b = ', norm
+  print *, ' '
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_mo()
+
+! ---
+
+
+
+
+

From f7e58e4a636af0ab066aa644a74ab56cb4de6041 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 21 Dec 2022 13:54:20 +0100
Subject: [PATCH 17/68] added print_hmat and sparse_mat

---
 src/determinants/filter_connected.irp.f |   1 -
 src/determinants/sparse_mat.irp.f       | 164 ++++++++++++++++++++++++
 src/tc_scf/fock_tc_mo_tot.irp.f         |  23 ++++
 src/tc_scf/fock_three.irp.f             |   2 +
 src/tools/print_hmat.irp.f              |  90 +++++++++++++
 5 files changed, 279 insertions(+), 1 deletion(-)
 create mode 100644 src/determinants/sparse_mat.irp.f
 create mode 100644 src/tools/print_hmat.irp.f

diff --git a/src/determinants/filter_connected.irp.f b/src/determinants/filter_connected.irp.f
index 6110eb89..2c9d7a49 100644
--- a/src/determinants/filter_connected.irp.f
+++ b/src/determinants/filter_connected.irp.f
@@ -96,7 +96,6 @@ subroutine filter_not_connected(key1,key2,Nint,sze,idx)
   idx(0) = l-1
 end
 
-
 subroutine filter_connected(key1,key2,Nint,sze,idx)
   use bitmasks
   implicit none
diff --git a/src/determinants/sparse_mat.irp.f b/src/determinants/sparse_mat.irp.f
new file mode 100644
index 00000000..889bbeba
--- /dev/null
+++ b/src/determinants/sparse_mat.irp.f
@@ -0,0 +1,164 @@
+  use bitmasks
+
+subroutine filter_connected_array(key1,key2,ld,Nint,sze,idx)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Filters out the determinants that are not connected by H
+  !
+  ! returns the array idx which contains the index of the
+  !
+  ! determinants in the array key1 that interact
+  !
+  ! via the H operator with key2.
+  !
+  ! idx(0) is the number of determinants that interact with key1
+  END_DOC
+  integer, intent(in)            :: Nint, ld,sze
+  integer(bit_kind), intent(in)  :: key1(Nint,2,ld)
+  integer(bit_kind), intent(in)  :: key2(Nint,2)
+  integer, intent(out)           :: idx(0:sze)
+
+  integer                        :: i,j,l
+  integer                        :: degree_x2
+
+  ASSERT (Nint > 0)
+  ASSERT (sze >= 0)
+
+  l=1
+
+  if (Nint==1) then
+
+    !DIR$ LOOP COUNT (1000)
+    do i=1,sze
+      degree_x2 = popcnt(    xor( key1(1,1,i), key2(1,1))) &
+                + popcnt(    xor( key1(1,2,i), key2(1,2)))
+!      print*,degree_x2
+      if (degree_x2 > 4) then
+        cycle
+      else
+        idx(l) = i
+        l = l+1
+      endif
+    enddo
+
+  else if (Nint==2) then
+
+    !DIR$ LOOP COUNT (1000)
+    do i=1,sze
+      degree_x2 =  popcnt(xor( key1(1,1,i), key2(1,1))) +            &
+          popcnt(xor( key1(2,1,i), key2(2,1))) +                     &
+          popcnt(xor( key1(1,2,i), key2(1,2))) +                     &
+          popcnt(xor( key1(2,2,i), key2(2,2)))
+      if (degree_x2 > 4) then
+        cycle
+      else
+        idx(l) = i
+        l = l+1
+      endif
+    enddo
+
+  else if (Nint==3) then
+
+    !DIR$ LOOP COUNT (1000)
+    do i=1,sze
+      degree_x2 = popcnt(xor( key1(1,1,i), key2(1,1))) +             &
+          popcnt(xor( key1(1,2,i), key2(1,2))) +                     &
+          popcnt(xor( key1(2,1,i), key2(2,1))) +                     &
+          popcnt(xor( key1(2,2,i), key2(2,2))) +                     &
+          popcnt(xor( key1(3,1,i), key2(3,1))) +                     &
+          popcnt(xor( key1(3,2,i), key2(3,2)))
+      if (degree_x2 > 4) then
+        cycle
+      else
+        idx(l) = i
+        l = l+1
+      endif
+    enddo
+
+  else
+
+    !DIR$ LOOP COUNT (1000)
+    do i=1,sze
+      degree_x2 = 0
+      !DIR$ LOOP COUNT MIN(4)
+      do j=1,Nint
+        degree_x2 = degree_x2+ popcnt(xor( key1(j,1,i), key2(j,1))) +&
+            popcnt(xor( key1(j,2,i), key2(j,2)))
+        if (degree_x2 > 4) then
+          exit
+        endif
+      enddo
+      if (degree_x2 <= 5) then
+        idx(l) = i
+        l = l+1
+      endif
+    enddo
+
+  endif
+  idx(0) = l-1
+!  print*,'idx(0) = ',idx(0)
+end
+
+
+ BEGIN_PROVIDER [ integer, n_sparse_mat]
+&BEGIN_PROVIDER [ integer, n_connected_per_det, (N_det)]
+&BEGIN_PROVIDER [ integer, n_max_connected_per_det]
+ implicit none
+ BEGIN_DOC
+! n_sparse_mat = total number of connections in the CI matrix 
+!
+! n_connected_per_det(i) = number of connected determinants to the determinant psi_det(1,1,i)
+!
+! n_max_connected_per_det = maximum number of connected determinants 
+ END_DOC
+ integer, allocatable :: idx(:)
+ allocate(idx(0:N_det))
+ integer :: i
+ n_sparse_mat = 0
+ do i = 1, N_det
+  call filter_connected_array(psi_det_sorted,psi_det_sorted(1,1,i),psi_det_size,N_int,N_det,idx)
+  n_connected_per_det(i) = idx(0)
+  n_sparse_mat += idx(0)
+ enddo
+ n_max_connected_per_det = maxval(n_connected_per_det)
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), connected_det_per_det, (N_int,2,n_max_connected_per_det,N_det)]
+&BEGIN_PROVIDER [ integer(bit_kind), list_connected_det_per_det, (n_max_connected_per_det,N_det)]
+ implicit none
+ BEGIN_DOC
+! connected_det_per_det(:,:,j,i) = jth connected determinant to the determinant psi_det(:,:,i)
+!
+! list_connected_det_per_det(j,i) = index of jth determinant in psi_det which is connected to psi_det(:,:,i)
+ END_DOC
+ integer, allocatable :: idx(:)
+ allocate(idx(0:N_det))
+ integer :: i,j
+ do i = 1, N_det
+  call filter_connected_array(psi_det_sorted,psi_det_sorted(1,1,i),psi_det_size,N_int,N_det,idx)
+  do j = 1, idx(0)
+   connected_det_per_det(1:N_int,1:2,j,i) = psi_det_sorted(1:N_int,1:2,idx(j))
+   list_connected_det_per_det(j,i) = idx(j)
+  enddo
+ enddo
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, sparse_h_mat, (n_max_connected_per_det, N_det)]
+ implicit none
+ BEGIN_DOC
+! sparse matrix format 
+!
+! sparse_h_mat(j,i) = matrix element between the jth connected determinant and psi_det(:,:,i)
+ END_DOC
+ integer :: i,j 
+ double precision :: hij
+ do i = 1, N_det
+  do j = 1, n_connected_per_det(i)
+   call i_H_j(psi_det(1,1,i),connected_det_per_det(1,1,j,i),N_int,hij)
+   sparse_h_mat(j,i) = hij
+  enddo
+ enddo
+
+END_PROVIDER 
+
diff --git a/src/tc_scf/fock_tc_mo_tot.irp.f b/src/tc_scf/fock_tc_mo_tot.irp.f
index a99c7698..2f33cd17 100644
--- a/src/tc_scf/fock_tc_mo_tot.irp.f
+++ b/src/tc_scf/fock_tc_mo_tot.irp.f
@@ -73,6 +73,29 @@
              + (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
        enddo
      enddo
+     if(three_body_h_tc)then
+      ! C-O
+      do j = 1, elec_beta_num
+       do i = elec_beta_num+1, elec_alpha_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! C-V
+      do j = 1, elec_beta_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! O-V
+      do j = elec_beta_num+1, elec_alpha_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+     endif
 
    endif
 
diff --git a/src/tc_scf/fock_three.irp.f b/src/tc_scf/fock_three.irp.f
index 35b6aac6..3901f707 100644
--- a/src/tc_scf/fock_three.irp.f
+++ b/src/tc_scf/fock_three.irp.f
@@ -128,6 +128,8 @@ BEGIN_PROVIDER [double precision, diag_three_elem_hf]
       call give_abb_contrib(integral_abb)
       call give_bbb_contrib(integral_bbb)
       diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+!      print*,'integral_aaa + integral_aab + integral_abb + integral_bbb'
+!      print*,integral_aaa , integral_aab , integral_abb , integral_bbb
 
     endif
 
diff --git a/src/tools/print_hmat.irp.f b/src/tools/print_hmat.irp.f
new file mode 100644
index 00000000..48001e44
--- /dev/null
+++ b/src/tools/print_hmat.irp.f
@@ -0,0 +1,90 @@
+program print_h_mat
+  implicit none
+  BEGIN_DOC
+  ! program that prints out the CI matrix in sparse form 
+  END_DOC
+  read_wf = .True. 
+  touch read_wf 
+  call print_wf_dets
+  call print_wf_coef
+  call sparse_mat
+  call full_mat
+  call test_sparse_mat
+end
+
+subroutine print_wf_dets
+ implicit none
+ integer :: i,j
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.wf_det'
+ i_unit_output = getUnitAndOpen(output,'w')
+ write(i_unit_output,*)N_det,N_int
+ do i = 1, N_det
+  write(i_unit_output,*)psi_det_sorted(1:N_int,1,i)
+  write(i_unit_output,*)psi_det_sorted(1:N_int,2,i)
+ enddo
+end
+
+subroutine print_wf_coef
+ implicit none
+ integer :: i,j
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.wf_coef'
+ i_unit_output = getUnitAndOpen(output,'w')
+ write(i_unit_output,*)N_det,N_states
+ do i = 1, N_det
+  write(i_unit_output,*)psi_coef_sorted(i,1:N_states)
+ enddo
+end
+
+subroutine sparse_mat
+ implicit none
+ integer :: i,j
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.hmat_sparse'
+ i_unit_output = getUnitAndOpen(output,'w')
+ do i = 1, N_det
+  write(i_unit_output,*)i,n_connected_per_det(i)
+  do j =1, n_connected_per_det(i)
+   write(i_unit_output,*)list_connected_det_per_det(j,i),sparse_h_mat(j,i)
+  enddo
+ enddo
+end
+
+
+subroutine full_mat
+ implicit none
+ integer :: i,j
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen
+ output=trim(ezfio_filename)//'.hmat_full'
+ i_unit_output = getUnitAndOpen(output,'w')
+ do i = 1, N_det
+  do j = i, N_det
+   write(i_unit_output,*)i,j,H_matrix_all_dets(j,i)
+  enddo
+ enddo
+end
+
+
+subroutine test_sparse_mat
+ implicit none
+ integer :: i,j
+ double precision, allocatable :: eigvec(:,:), eigval(:), hmat(:,:)
+ allocate(eigval(N_det), eigvec(N_det,N_det),hmat(N_det,N_det))
+ hmat = 0.d0
+ do i = 1, N_det
+  do j =1, n_connected_per_det(i)
+   hmat(list_connected_det_per_det(j,i),i) = sparse_h_mat(j,i)
+  enddo
+ enddo
+ call lapack_diag(eigval,eigvec,hmat,N_det,N_det) 
+ print*,'The two energies should be the same '
+ print*,'eigval(1) = ',eigval(1)
+ print*,'psi_energy= ',CI_electronic_energy(1)
+
+
+end

From 8eecb342c95c2175963195239af435e5130e8c49 Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Wed, 21 Dec 2022 14:10:39 +0100
Subject: [PATCH 18/68] fixed bug in AO Fock UHF

---
 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f | 172 ++++++++++++++++++++------
 src/tc_scf/test_int.irp.f             |   8 +-
 2 files changed, 139 insertions(+), 41 deletions(-)

diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
index 607140f9..37fb3cba 100644
--- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
+++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@@ -239,11 +239,12 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)]
   END_DOC
 
   implicit none
-  integer          :: g, d, e, k, mu, nu
-  double precision :: dm_ge_a, dm_ge_b, dm_ge
-  double precision :: dm_dk_a, dm_dk_b, dm_dk
-  double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
-  double precision :: ti, tf
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
 
   print *, ' PROVIDING fock_3e_uhf_ao_a ...'
   call wall_time(ti)
@@ -251,48 +252,98 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)]
   fock_3e_uhf_ao_a = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                                                &
- !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk,         &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
  !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
  !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
  !$OMP DO
   do g = 1, ao_num
     do e = 1, ao_num
       dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
       dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
       dm_ge   = dm_ge_a + dm_ge_b
-
       do d = 1, ao_num
         do k = 1, ao_num
           dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
           dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
           dm_dk   = dm_dk_a + dm_dk_b
-
           do mu = 1, ao_num
             do nu = 1, ao_num
-
               call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
               call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
               call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
               call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
               call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
               call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
-
-              fock_3e_uhf_ao_a(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
-                                                 + dm_ge_a * dm_dk_a * i_mugd_eknu &
-                                                 + dm_ge_a * dm_dk_a * i_mugd_knue &
-                                                 - dm_ge   * dm_dk_a * i_mugd_kenu &
-                                                 - dm_ge_a * dm_dk   * i_mugd_enuk &
-                                                 - dm_ge_a * dm_dk_a * i_mugd_nuke &
-                                                 - dm_ge_b * dm_dk_b * i_mugd_nuke )
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_a * dm_dk_a * i_mugd_eknu &
+                                      + dm_ge_a * dm_dk_a * i_mugd_knue &
+                                      - dm_ge_a * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_a * i_mugd_kenu &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke )
             enddo
           enddo
         enddo
       enddo
     enddo
   enddo
- !$OMP END DO
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
  !$OMP END PARALLEL
 
+! TODO
+! !$OMP PARALLEL DEFAULT (NONE)                                                                &
+! !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk,         &
+! !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+! !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a)
+! !$OMP DO
+!  do g = 1, ao_num
+!    do e = 1, ao_num
+!      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+!      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+!      dm_ge   = dm_ge_a + dm_ge_b
+!      do d = 1, ao_num
+!        do k = 1, ao_num
+!          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+!          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+!          dm_dk   = dm_dk_a + dm_dk_b
+!          do mu = 1, ao_num
+!            do nu = 1, ao_num
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+!              fock_3e_uhf_ao_a(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+!                                                 + dm_ge_a * dm_dk_a * i_mugd_eknu &
+!                                                 + dm_ge_a * dm_dk_a * i_mugd_knue &
+!                                                 - dm_ge_a * dm_dk   * i_mugd_enuk &
+!                                                 - dm_ge   * dm_dk_a * i_mugd_kenu &
+!                                                 - dm_ge_a * dm_dk_a * i_mugd_nuke &
+!                                                 - dm_ge_b * dm_dk_b * i_mugd_nuke )
+!            enddo
+!          enddo
+!        enddo
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END DO
+! !$OMP END PARALLEL
+
   call wall_time(tf)
   print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti
 
@@ -314,11 +365,12 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)]
   END_DOC
 
   implicit none
-  integer          :: g, d, e, k, mu, nu
-  double precision :: dm_ge_a, dm_ge_b, dm_ge
-  double precision :: dm_dk_a, dm_dk_b, dm_dk
-  double precision :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
-  double precision :: ti, tf
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
 
   print *, ' PROVIDING fock_3e_uhf_ao_b ...'
   call wall_time(ti)
@@ -326,48 +378,96 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)]
   fock_3e_uhf_ao_b = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                                                &
- !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk,         &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
  !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
  !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
  !$OMP DO
   do g = 1, ao_num
     do e = 1, ao_num
       dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
       dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
       dm_ge   = dm_ge_a + dm_ge_b
-
       do d = 1, ao_num
         do k = 1, ao_num
           dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
           dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
           dm_dk   = dm_dk_a + dm_dk_b
-
           do mu = 1, ao_num
             do nu = 1, ao_num
-
               call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
               call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
               call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
               call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
               call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
               call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
-
-              fock_3e_uhf_ao_b(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
-                                                 + dm_ge_b * dm_dk_b * i_mugd_eknu &
-                                                 + dm_ge_b * dm_dk_b * i_mugd_knue &
-                                                 - dm_ge   * dm_dk_b * i_mugd_kenu &
-                                                 - dm_ge_b * dm_dk   * i_mugd_enuk &
-                                                 - dm_ge_b * dm_dk_b * i_mugd_nuke &
-                                                 - dm_ge_a * dm_dk_a * i_mugd_nuke )
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_b * dm_dk_b * i_mugd_eknu &
+                                      + dm_ge_b * dm_dk_b * i_mugd_knue &
+                                      - dm_ge_b * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_b * i_mugd_kenu &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke )
             enddo
           enddo
         enddo
       enddo
     enddo
   enddo
- !$OMP END DO
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
  !$OMP END PARALLEL
 
+! TODO
+! !$OMP PARALLEL DO DEFAULT (NONE)                                                             &
+! !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk,         &
+! !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+! !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b)
+!  do g = 1, ao_num
+!    do e = 1, ao_num
+!      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+!      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+!      dm_ge   = dm_ge_a + dm_ge_b
+!      do d = 1, ao_num
+!        do k = 1, ao_num
+!          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+!          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+!          dm_dk   = dm_dk_a + dm_dk_b
+!          do mu = 1, ao_num
+!            do nu = 1, ao_num
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+!              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+!              fock_3e_uhf_ao_b(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+!                                                 + dm_ge_b * dm_dk_b * i_mugd_eknu &
+!                                                 + dm_ge_b * dm_dk_b * i_mugd_knue &
+!                                                 - dm_ge_b * dm_dk   * i_mugd_enuk &
+!                                                 - dm_ge   * dm_dk_b * i_mugd_kenu &
+!                                                 - dm_ge_b * dm_dk_b * i_mugd_nuke &
+!                                                 - dm_ge_a * dm_dk_a * i_mugd_nuke )
+!            enddo
+!          enddo
+!        enddo
+!      enddo
+!    enddo
+!  enddo
+! !$OMP END PARALLEL DO
+
   call wall_time(tf)
   print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti
 
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index e12e629b..c9b0d108 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -25,7 +25,7 @@ program test_ints
  !call routine_int2_grad1u2_grad2u2_j1b2
 
 
-  !call test_fock_3e_uhf_ao()
+  call test_fock_3e_uhf_ao()
   call test_fock_3e_uhf_mo()
 
 end
@@ -364,11 +364,10 @@ subroutine test_fock_3e_uhf_ao()
   thr_ih = 1d-7
 
   PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_ao_a fock_3e_uhf_ao_b
 
   ! ---
 
-  PROVIDE fock_3e_uhf_ao_a 
-
   allocate(fock_3e_uhf_ao_a_mo(mo_num,mo_num))
   call ao_to_mo_bi_ortho( fock_3e_uhf_ao_a   , size(fock_3e_uhf_ao_a   , 1) &
                         , fock_3e_uhf_ao_a_mo, size(fock_3e_uhf_ao_a_mo, 1) )
@@ -397,8 +396,6 @@ subroutine test_fock_3e_uhf_ao()
 
   ! ---
 
-  PROVIDE fock_3e_uhf_ao_b
-
   allocate(fock_3e_uhf_ao_b_mo(mo_num,mo_num))
   call ao_to_mo_bi_ortho( fock_3e_uhf_ao_b   , size(fock_3e_uhf_ao_b   , 1) &
                         , fock_3e_uhf_ao_b_mo, size(fock_3e_uhf_ao_b_mo, 1) )
@@ -421,6 +418,7 @@ subroutine test_fock_3e_uhf_ao()
     enddo
   enddo
   print *, ' diff on F_b = ', diff_tot/norm
+  print *, ' '
 
   deallocate(fock_3e_uhf_ao_b_mo)
 

From 3560133e9c2acdebed40bd3ff25ecd2b94ccd5f6 Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Thu, 22 Dec 2022 00:46:31 +0100
Subject: [PATCH 19/68] added explicit PROVIDE in F(3e)

---
 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f        |  6 ++++++
 src/tc_scf/fock_three_bi_ortho_new_new.irp.f | 22 +++++++++++++++++++-
 src/tc_scf/tc_scf.irp.f                      | 14 ++++++-------
 src/tc_scf/test_int.irp.f                    |  2 +-
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
index 37fb3cba..048255f6 100644
--- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
+++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@@ -8,6 +8,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
   double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
   double precision :: ti, tf
 
+  PROVIDE mo_l_coef mo_r_coef
+
   !print *, ' PROVIDING fock_3e_uhf_mo_cs ...'
   call wall_time(ti)
 
@@ -52,6 +54,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
   double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
   double precision :: ti, tf
 
+  PROVIDE mo_l_coef mo_r_coef
+
   !print *, ' PROVIDING fock_3e_uhf_mo_a ...'
   call wall_time(ti)
 
@@ -147,6 +151,8 @@ BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
   double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
   double precision :: ti, tf
 
+  PROVIDE mo_l_coef mo_r_coef
+
   !print *, ' PROVIDING fock_3e_uhf_mo_b ...'
   call wall_time(ti)
 
diff --git a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
index 859c06ca..f73171a3 100644
--- a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
+++ b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
@@ -6,6 +6,8 @@ BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)]
   implicit none
   integer :: i, a
 
+  PROVIDE mo_l_coef mo_r_coef
+
   fock_a_tot_3e_bi_orth = 0.d0
 
   do i = 1, mo_num
@@ -25,6 +27,8 @@ BEGIN_PROVIDER [double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)]
   implicit none
   integer :: i, a
 
+  PROVIDE mo_l_coef mo_r_coef
+
   fock_b_tot_3e_bi_orth = 0.d0
 
   do i = 1, mo_num
@@ -47,6 +51,8 @@ BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)]
   double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
   double precision :: new
 
+  PROVIDE mo_l_coef mo_r_coef
+
   fock_cs_3e_bi_orth = 0.d0
 
   do i = 1, mo_num
@@ -91,6 +97,8 @@ BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)]
   double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
   double precision :: new
 
+  PROVIDE mo_l_coef mo_r_coef
+
   fock_a_tmp1_bi_ortho = 0.d0
 
   do i = 1, mo_num
@@ -122,6 +130,8 @@ BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)]
   implicit none
   integer          :: i, a, j, k
   double precision :: contrib_sss
+
+  PROVIDE mo_l_coef mo_r_coef
  
   fock_a_tmp2_bi_ortho = 0.d0
 
@@ -148,6 +158,8 @@ BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)]
   double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
   double precision :: new
 
+  PROVIDE mo_l_coef mo_r_coef
+
   fock_b_tmp1_bi_ortho = 0.d0
 
   do i = 1, mo_num
@@ -176,6 +188,8 @@ BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)]
   integer          :: i, a, j, k
   double precision :: contrib_soo
 
+  PROVIDE mo_l_coef mo_r_coef
+
   fock_b_tmp2_bi_ortho = 0.d0
 
   do i = 1, mo_num
@@ -199,12 +213,14 @@ subroutine contrib_3e_sss(a, i, j, k, integral)
   BEGIN_DOC
   ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
   END_DOC
- 
+
   implicit none
   integer,          intent(in)  :: a, i, j, k
   double precision, intent(out) :: integral
   double precision              :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
 
+  PROVIDE mo_l_coef mo_r_coef
+
   call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
   call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
   call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
@@ -233,6 +249,8 @@ subroutine contrib_3e_soo(a,i,j,k,integral)
   double precision, intent(out) :: integral
   double precision              :: direct_int, exch_23_int
 
+  PROVIDE mo_l_coef mo_r_coef
+
   call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
   call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
   integral = direct_int - exch_23_int 
@@ -249,6 +267,8 @@ subroutine contrib_3e_sos(a, i, j, k, integral)
   ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
   END_DOC
 
+  PROVIDE mo_l_coef mo_r_coef
+
   implicit none
   integer,          intent(in)  :: a, i, j, k
   double precision, intent(out) :: integral
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
index 5090cc97..fd11c48e 100644
--- a/src/tc_scf/tc_scf.irp.f
+++ b/src/tc_scf/tc_scf.irp.f
@@ -170,7 +170,7 @@ subroutine simple_tcscf()
       TOUCH mo_l_coef mo_r_coef
       call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
 
-      !call test_fock_3e_uhf_mo()
+      call test_fock_3e_uhf_mo()
 
       print *, ' ***'
       print *, ''
@@ -226,9 +226,9 @@ subroutine test_fock_3e_uhf_mo()
 
       diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i))
       if(diff_ij .gt. thr_ih) then
-        print *, ' difference on ', j, i
-        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
-        print *, ' UHF  : ', fock_3e_uhf_mo_a     (j,i)
+        !print *, ' difference on ', j, i
+        !print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        !print *, ' UHF  : ', fock_3e_uhf_mo_a     (j,i)
         !stop
       endif
 
@@ -249,9 +249,9 @@ subroutine test_fock_3e_uhf_mo()
 
       diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i))
       if(diff_ij .gt. thr_ih) then
-        print *, ' difference on ', j, i
-        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
-        print *, ' UHF  : ', fock_3e_uhf_mo_b     (j,i)
+        !print *, ' difference on ', j, i
+        !print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        !print *, ' UHF  : ', fock_3e_uhf_mo_b     (j,i)
         !stop
       endif
 
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
index c9b0d108..b5821cb3 100644
--- a/src/tc_scf/test_int.irp.f
+++ b/src/tc_scf/test_int.irp.f
@@ -25,7 +25,7 @@ program test_ints
  !call routine_int2_grad1u2_grad2u2_j1b2
 
 
-  call test_fock_3e_uhf_ao()
+  !call test_fock_3e_uhf_ao()
   call test_fock_3e_uhf_mo()
 
 end

From eee1fe86733bb34628d5243f3fd7c9f2e98ca76a Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Thu, 22 Dec 2022 14:42:38 +0100
Subject: [PATCH 20/68] prep for PR

---
 src/bi_ort_ints/semi_num_ints_mo.irp.f | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
index 89098676..a548ee62 100644
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -178,8 +178,8 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3,
   integer :: i, j, ipoint
 
   do ipoint = 1, n_points_final_grid
-    do i = 1, mo_num
-      do j = 1, mo_num
+    do i = 1, ao_num
+      do j = 1, ao_num
         int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(1,j,i,ipoint)
         int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(2,j,i,ipoint)
         int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(3,j,i,ipoint)

From 26840614021ce1b48b6d28b40b717fb232599074 Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Thu, 22 Dec 2022 14:56:27 +0100
Subject: [PATCH 21/68] merged with MANU

---
 src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
index cb25ee7c..54c2d95b 100644
--- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -127,7 +127,6 @@ double precision function overlap_abs_gauss_r12(D_center,delta,A_center,B_center
     enddo
   enddo
   overlap_abs_gauss_r12= accu
->>>>>>> f7e58e4a636af0ab066aa644a74ab56cb4de6041
 end
 
 !---

From 43e9b8c6872e7fb2f77ec2609fb62d9057783f13 Mon Sep 17 00:00:00 2001
From: AbdAmmar <abd.ammar.phys@gmail.com>
Date: Thu, 22 Dec 2022 17:53:31 +0100
Subject: [PATCH 22/68] modif in save for TC qmccchem

---
 .../save_bitcpsileft_for_qmcchem.irp.f        | 35 +++++++++++++------
 1 file changed, 25 insertions(+), 10 deletions(-)

diff --git a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
index 60201f5f..eb812401 100644
--- a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
+++ b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
@@ -14,21 +14,36 @@ program save_bitcpsileft_for_qmcchem
 
   e_ref = 0.d0
   iunit = 13
-  open(unit=iunit,file=trim(ezfio_filename)//'/simulation/e_ref',action='write')
-  call ezfio_has_fci_energy_pt2(exists)
-
-  if(.not.exists) then
-    call ezfio_has_fci_energy(exists)
+  open(unit=iunit, file=trim(ezfio_filename)//'/simulation/e_ref', action='write')
 
+    call ezfio_has_fci_energy_pt2(exists)
     if(.not.exists) then
-      call ezfio_has_tc_scf_bitc_energy(exists)
-      if(exists) then
-        call ezfio_get_tc_scf_bitc_energy(e_ref)
+
+      call ezfio_has_fci_energy(exists)
+      if(.not.exists) then
+
+        call ezfio_has_cisd_energy(exists)
+        if(.not.exists) then
+
+          call ezfio_has_tc_scf_bitc_energy(exists)
+          if(exists) then
+            call ezfio_get_tc_scf_bitc_energy(e_ref)
+          endif
+
+        else
+          call ezfio_get_cisd_energy(e_ref)
+        endif
+
+      else
+        call ezfio_get_fci_energy(e_ref)
       endif
+
+      else
+        call ezfio_get_fci_energy_pt2(e_ref)
     endif
 
-  endif
-  write(iunit,*) e_ref
+    write(iunit,*) e_ref
+
   close(iunit)
 
 end

From 547e6e88236f6fc7e1efd96ed99b18eced50ab09 Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Mon, 2 Jan 2023 09:28:42 +0100
Subject: [PATCH 23/68] fix csf

---
 src/csf/configuration_CI_sigma_helpers.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/csf/configuration_CI_sigma_helpers.irp.f b/src/csf/configuration_CI_sigma_helpers.irp.f
index 76761d53..e6e6a0d7 100644
--- a/src/csf/configuration_CI_sigma_helpers.irp.f
+++ b/src/csf/configuration_CI_sigma_helpers.irp.f
@@ -1155,7 +1155,7 @@ END_PROVIDER
         !end do
 
         if(.NOT. pqExistsQ) then
-           tableUniqueAlphas(p,q) = .TRUE.
+           tableUniqueAlphas(pp,qq) = .TRUE.
            !print *,p,q
            !call debug_spindet(Jsomo,1)
            !call debug_spindet(Jdomo,1)

From 1a12e7f30882684cdc8dddcf96df05449ee54613 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 9 Jan 2023 10:51:12 +0100
Subject: [PATCH 24/68] Fixed AO normalization problem

---
 src/ao_basis/EZFIO.cfg              |  6 +++---
 src/ao_basis/aos.irp.f              | 15 ++++++++-------
 src/ao_one_e_ints/pot_ao_ints.irp.f |  4 ++--
 src/ao_one_e_ints/pseudopot.f90     | 28 ++++++++++++++--------------
 4 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/src/ao_basis/EZFIO.cfg b/src/ao_basis/EZFIO.cfg
index dd61b1be..3ac16446 100644
--- a/src/ao_basis/EZFIO.cfg
+++ b/src/ao_basis/EZFIO.cfg
@@ -57,13 +57,13 @@ default: false
 
 [ao_normalized]
 type: logical
-doc: Use normalized basis functions
+doc: Normalize the atomic orbitals
 interface: ezfio, provider
-default: true
+default: false
 
 [primitives_normalized]
 type: logical
-doc: Use normalized primitive functions
+doc: Normalize the primitive basis functions
 interface: ezfio, provider
 default: true
 
diff --git a/src/ao_basis/aos.irp.f b/src/ao_basis/aos.irp.f
index 3a9e9fb7..dafea9c4 100644
--- a/src/ao_basis/aos.irp.f
+++ b/src/ao_basis/aos.irp.f
@@ -63,15 +63,14 @@ END_PROVIDER
   ! Coefficients including the |AO| normalization
   END_DOC
 
-  do i=1,ao_num
-    l = ao_shell(i)
-    ao_coef_normalized(i,:) = shell_coef(l,:) * shell_normalization_factor(l)
-  end do
 
   double precision               :: norm,overlap_x,overlap_y,overlap_z,C_A(3), c
-  integer                        :: l, powA(3), nz
+  integer                        :: l, powA(3)
+  integer, parameter             :: nz=100
   integer                        :: i,j,k
-  nz=100
+
+   ao_coef_normalized(:,:) = ao_coef(:,:)
+
   C_A = 0.d0
 
   do i=1,ao_num
@@ -80,7 +79,7 @@ END_PROVIDER
     powA(2) = ao_power(i,2)
     powA(3) = ao_power(i,3)
 
-    ! Normalization of the primitives
+    ! GAMESS-type normalization of the primitives
     if (primitives_normalized) then
       do j=1,ao_prim_num(i)
         call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,j), &
@@ -91,6 +90,7 @@ END_PROVIDER
     ! Normalization of the contracted basis functions
     if (ao_normalized) then
       norm = 0.d0
+      l = ao_shell(i)
       do j=1,ao_prim_num(i)
         do k=1,ao_prim_num(i)
           call overlap_gaussian_xyz(C_A,C_A,ao_expo(i,j),ao_expo(i,k),powA,powA,overlap_x,overlap_y,overlap_z,c,nz)
@@ -98,6 +98,7 @@ END_PROVIDER
         enddo
       enddo
       ao_coef_normalization_factor(i) = 1.d0/dsqrt(norm)
+      ao_coef_normalized(i,:) *= ao_coef_normalization_factor(i)
     else
       ao_coef_normalization_factor(i) = 1.d0
     endif
diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f
index dc19f6c7..928053ad 100644
--- a/src/ao_one_e_ints/pot_ao_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_ints.irp.f
@@ -18,6 +18,8 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)]
   double precision :: A_center(3),B_center(3),C_center(3)
   double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
 
+  ao_integrals_n_e = 0.d0
+
   if (read_ao_integrals_n_e) then
 
     call ezfio_get_ao_one_e_ints_ao_integrals_n_e(ao_integrals_n_e)
@@ -36,8 +38,6 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)]
 
     else
 
-      ao_integrals_n_e = 0.d0
-
       !$OMP PARALLEL                                                   &
           !$OMP DEFAULT (NONE)                                         &
           !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
diff --git a/src/ao_one_e_ints/pseudopot.f90 b/src/ao_one_e_ints/pseudopot.f90
index 7321dff7..e02dea3b 100644
--- a/src/ao_one_e_ints/pseudopot.f90
+++ b/src/ao_one_e_ints/pseudopot.f90
@@ -1950,26 +1950,26 @@ xq(17)=-3.34785456738322
 xq(18)=-3.94476404011563
 xq(19)=-4.60368244955074
 xq(20)=-5.38748089001123
-wq(1)=  2.229393645534151E-013
-wq(2)=  4.399340992273176E-010
-wq(3)=  1.086069370769280E-007
-wq(4)=  7.802556478532063E-006
-wq(5)=  2.283386360163528E-004
-wq(6)=  3.243773342237853E-003
-wq(7)=  2.481052088746362E-002
+wq(1)=  2.229393645534151D-013
+wq(2)=  4.399340992273176D-010
+wq(3)=  1.086069370769280D-007
+wq(4)=  7.802556478532063D-006
+wq(5)=  2.283386360163528D-004
+wq(6)=  3.243773342237853D-003
+wq(7)=  2.481052088746362D-002
 wq(8)=  0.109017206020022
 wq(9)=  0.286675505362834
 wq(10)= 0.462243669600610
 wq(11)= 0.462243669600610
 wq(12)= 0.286675505362834
 wq(13)= 0.109017206020022
-wq(14)= 2.481052088746362E-002
-wq(15)= 3.243773342237853E-003
-wq(16)= 2.283386360163528E-004
-wq(17)= 7.802556478532063E-006
-wq(18)= 1.086069370769280E-007
-wq(19)= 4.399340992273176E-010
-wq(20)= 2.229393645534151E-013
+wq(14)= 2.481052088746362D-002
+wq(15)= 3.243773342237853D-003
+wq(16)= 2.283386360163528D-004
+wq(17)= 7.802556478532063D-006
+wq(18)= 1.086069370769280D-007
+wq(19)= 4.399340992273176D-010
+wq(20)= 2.229393645534151D-013
 
       npts=20
 !      call gauher(xq,wq,npts)

From b4f233ae8571f6dde9d3135602126ce5f27e6699 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 9 Jan 2023 11:00:07 +0100
Subject: [PATCH 25/68] Fixed qp_convert

---
 bin/qp_convert_output_to_ezfio | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bin/qp_convert_output_to_ezfio b/bin/qp_convert_output_to_ezfio
index e7c44b37..e53a9392 100755
--- a/bin/qp_convert_output_to_ezfio
+++ b/bin/qp_convert_output_to_ezfio
@@ -162,11 +162,11 @@ def write_ezfio(res, filename):
     # P a r s i n g #
     # ~#~#~#~#~#~#~ #
 
-    prim_num_max = ezfio.get_ao_basis_ao_prim_num_max()
+    prim_num_max = max(ezfio.get_ao_basis_ao_prim_num())
+    ezfio.set_ao_basis_ao_prim_num_max(prim_num_max)
 
     for i in range(len(res.basis)):
-        coefficient[
-            i] += [0. for j in range(len(coefficient[i]), prim_num_max)]
+        coefficient[i] += [0. for j in range(len(coefficient[i]), prim_num_max)]
         exponent[i] += [0. for j in range(len(exponent[i]), prim_num_max)]
 
     coefficient = reduce(lambda x, y: x + y, coefficient, [])

From 5911d134ef563162e8afbda454976a87b2d5437f Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 9 Jan 2023 12:07:41 +0100
Subject: [PATCH 26/68] Cleaning

---
 config/ifort_2019_debug.cfg             | 66 +++++++++++++++++++++++++
 src/mo_two_e_ints/mo_bi_integrals.irp.f | 20 +++-----
 2 files changed, 73 insertions(+), 13 deletions(-)
 create mode 100644 config/ifort_2019_debug.cfg

diff --git a/config/ifort_2019_debug.cfg b/config/ifort_2019_debug.cfg
new file mode 100644
index 00000000..cb14f467
--- /dev/null
+++ b/config/ifort_2019_debug.cfg
@@ -0,0 +1,66 @@
+# Common flags
+##############
+#
+# -mkl=[parallel|sequential] : Use the MKL library
+# --ninja                 : Allow the utilisation of ninja. It is mandatory !
+# --align=32                 : Align all provided arrays on a 32-byte boundary
+#
+[COMMON]
+FC           : ifort -fpic
+LAPACK_LIB   : -mkl=parallel -lirc -lsvml -limf -lipps
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32 --assert -DINTEL -DSET_NESTED
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+# 
+[OPTION]
+MODE    : DEBUG      ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 0          ; Enable cache_compile.py
+OPENMP  : 1          ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -xHost                     : Compile a binary optimized for the current architecture
+# -O2                        : O3 not better than O2.
+# -ip                        : Inter-procedural optimizations
+# -ftz                       : Flushes denormal results to zero
+#
+[OPT]
+FC       : -traceback
+FCFLAGS  : -msse4.2 -O2 -ip -ftz -g 
+
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC       : -p -g
+FCFLAGS  : -msse4.2 -O2 -ip -ftz 
+
+
+# Debugging flags
+#################
+#
+# -traceback   : Activate backtrace on runtime
+# -fpe0        : All floating point exaceptions
+# -C           : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+# -msse4.2     : Valgrind needs a very simple x86 executable
+#
+[DEBUG]
+FC      : -g -traceback
+FCFLAGS : -msse4.2 -check all -debug all -fpe-all=0  -implicitnone
+
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -qopenmp
+IRPF90_FLAGS : --openmp
+
diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f
index 56d8cf28..ae728943 100644
--- a/src/mo_two_e_ints/mo_bi_integrals.irp.f
+++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f
@@ -38,7 +38,7 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ]
     print*, 'MO integrals provided'
     return
   else
-    PROVIDE ao_two_e_integrals_in_map 
+    PROVIDE ao_two_e_integrals_in_map
   endif
 
   print *,  ''
@@ -245,8 +245,6 @@ subroutine add_integrals_to_map(mask_ijkl)
     return
   endif
 
-  double precision               :: accu_bis
-  accu_bis = 0.d0
   call wall_time(wall_1)
 
   size_buffer = min( (qp_max_mem/(nproc*5)),mo_num*mo_num*mo_num)
@@ -256,7 +254,7 @@ subroutine add_integrals_to_map(mask_ijkl)
   !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax,   &
       !$OMP  two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
       !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
-      !$OMP  wall_0,thread_num,accu_bis)                             &
+      !$OMP  wall_0,thread_num)                             &
       !$OMP  DEFAULT(NONE)                                           &
       !$OMP  SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l,   &
       !$OMP  mo_coef_transp,                                         &
@@ -434,10 +432,10 @@ subroutine add_integrals_to_map(mask_ijkl)
   !$OMP END DO NOWAIT
   deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
 
-  integer                        :: index_needed
-
-  call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
-      real(mo_integrals_threshold,integral_kind))
+  if (n_integrals > 0) then
+    call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
+        real(mo_integrals_threshold,integral_kind))
+  endif
   deallocate(buffer_i, buffer_value)
   !$OMP END PARALLEL
   call map_merge(mo_integrals_map)
@@ -527,12 +525,10 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
 
   call wall_time(wall_1)
   call cpu_time(cpu_1)
-  double precision               :: accu_bis
-  accu_bis = 0.d0
   !$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, &
       !$OMP  two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,&
       !$OMP  buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0,   &
-      !$OMP  wall_0,thread_num,accu_bis)                             &
+      !$OMP  wall_0,thread_num)                             &
       !$OMP  DEFAULT(NONE)                                           &
       !$OMP  SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,       &
       !$OMP  mo_coef_transp,                                         &
@@ -730,8 +726,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk)
   !$OMP END DO NOWAIT
   deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3)
 
-  integer                        :: index_needed
-
   call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,&
       real(mo_integrals_threshold,integral_kind))
   deallocate(buffer_i, buffer_value)

From a3dd01914482eab568b56f61ef0e05c75c8c94b2 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Mon, 9 Jan 2023 12:25:03 +0100
Subject: [PATCH 27/68] Fixed problem of buffer size in MO transformation

---
 src/mo_two_e_ints/mo_bi_integrals.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f
index ae728943..411d2d4e 100644
--- a/src/mo_two_e_ints/mo_bi_integrals.irp.f
+++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f
@@ -247,7 +247,7 @@ subroutine add_integrals_to_map(mask_ijkl)
 
   call wall_time(wall_1)
 
-  size_buffer = min( (qp_max_mem/(nproc*5)),mo_num*mo_num*mo_num)
+  size_buffer = min(mo_num*mo_num*mo_num,8000000)
   print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+&
       ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core'
 

From 92a4e33f8a21717cab0c0e4f8412ed6903afb04a Mon Sep 17 00:00:00 2001
From: Abdallah Ammar <abd.ammar.phys@gmail.com>
Date: Mon, 16 Jan 2023 23:36:05 +0100
Subject: [PATCH 28/68]  clean fork with + TC stuffs

---
 src/ao_many_one_e_ints/ao_gaus_gauss.irp.f    |   67 +-
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   |  518 +++++++++
 src/ao_many_one_e_ints/grad2_jmu_modif.irp.f  |   49 +-
 .../grad2_jmu_modif_vect.irp.f                |   22 +-
 .../grad_lapl_jmu_manu.irp.f                  |  369 ++++++
 .../grad_lapl_jmu_modif.irp.f                 |   62 +-
 .../grad_related_ints.irp.f                   |   46 +-
 src/ao_many_one_e_ints/list_grid.irp.f        |   59 +
 src/ao_many_one_e_ints/listj1b.irp.f          |   15 +-
 src/ao_many_one_e_ints/listj1b_sorted.irp.f   |  191 ++++
 .../prim_int_gauss_gauss.irp.f                |   99 +-
 src/ao_tc_eff_map/fit_j.irp.f                 |  125 ++
 src/ao_tc_eff_map/potential.irp.f             |  165 ++-
 src/bi_ort_ints/semi_num_ints_mo.irp.f        |  112 +-
 src/bi_ort_ints/three_body_ints_bi_ort.irp.f  |   43 +-
 src/bi_ortho_mos/bi_density.irp.f             |   57 +-
 src/bi_ortho_mos/mos_rl.irp.f                 |   47 +
 src/dft_utils_in_r/ao_in_r.irp.f              |   41 +
 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f      |  155 +++
 src/hartree_fock/fock_matrix_hf.irp.f         |   25 +-
 src/hartree_fock/scf.irp.f                    |   19 +-
 src/non_h_ints_mu/debug_integ_jmu_modif.irp.f |  195 +++-
 src/non_h_ints_mu/grad_squared.irp.f          |  122 +-
 src/non_h_ints_mu/grad_squared_manu.irp.f     |  221 ++++
 src/non_h_ints_mu/j12_nucl_utils.irp.f        |   17 +
 src/non_h_ints_mu/new_grad_tc.irp.f           |  284 ++++-
 src/non_h_ints_mu/new_grad_tc_manu.irp.f      |  174 +++
 src/non_h_ints_mu/total_tc_int.irp.f          |   51 +-
 src/non_hermit_dav/biorthog.irp.f             |   62 +-
 .../lapack_diag_non_hermit.irp.f              |  174 +--
 src/non_hermit_dav/new_routines.irp.f         |   53 +-
 src/scf_utils/diagonalize_fock.irp.f          |   11 +-
 src/scf_utils/diis.irp.f                      |  179 +++
 src/scf_utils/fock_matrix.irp.f               |    2 +
 src/scf_utils/rh_scf_simple.irp.f             |  129 +++
 src/scf_utils/roothaan_hall_scf.irp.f         |   33 +-
 .../save_bitcpsileft_for_qmcchem.irp.f        |   35 +-
 src/tc_bi_ortho/tc_som.irp.f                  |   70 ++
 src/tc_bi_ortho/test_tc_fock.irp.f            |   84 +-
 src/tc_keywords/EZFIO.cfg                     |   52 +-
 src/tc_scf/diago_bi_ort_tcfock.irp.f          |  105 +-
 src/tc_scf/diis_tcscf.irp.f                   |  186 +++
 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f         |  405 +++++++
 src/tc_scf/fock_tc.irp.f                      |  295 +++--
 src/tc_scf/fock_tc_mo_tot.irp.f               |   23 +
 src/tc_scf/fock_three.irp.f                   |  106 +-
 src/tc_scf/fock_three_bi_ortho_new_new.irp.f  |  400 ++++---
 src/tc_scf/rh_tcscf_diis.irp.f                |  362 ++++++
 src/tc_scf/rh_tcscf_simple.irp.f              |  129 +++
 src/tc_scf/rotate_tcscf_orbitals.irp.f        |    8 +-
 src/tc_scf/routines_rotates.irp.f             |   12 +-
 src/tc_scf/tc_scf.irp.f                       |  166 +--
 src/tc_scf/tc_scf_dm.irp.f                    |   42 +-
 src/tc_scf/tc_scf_energy.irp.f                |   14 +-
 src/tc_scf/tc_scf_utils.irp.f                 |    1 +
 src/tc_scf/test_int.irp.f                     | 1008 +++++++++++++++++
 src/tools/print_he_energy.irp.f               |    4 +-
 57 files changed, 6528 insertions(+), 972 deletions(-)
 create mode 100644 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
 create mode 100644 src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
 create mode 100644 src/ao_many_one_e_ints/list_grid.irp.f
 create mode 100644 src/ao_many_one_e_ints/listj1b_sorted.irp.f
 create mode 100644 src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
 create mode 100644 src/non_h_ints_mu/grad_squared_manu.irp.f
 create mode 100644 src/non_h_ints_mu/new_grad_tc_manu.irp.f
 create mode 100644 src/scf_utils/rh_scf_simple.irp.f
 create mode 100644 src/tc_bi_ortho/tc_som.irp.f
 create mode 100644 src/tc_scf/diis_tcscf.irp.f
 create mode 100644 src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
 create mode 100644 src/tc_scf/rh_tcscf_diis.irp.f
 create mode 100644 src/tc_scf/rh_tcscf_simple.irp.f
 create mode 100644 src/tc_scf/test_int.irp.f

diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
index 213a63e4..d2115d9e 100644
--- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -156,6 +156,53 @@ end function overlap_gauss_r12_ao
 
 ! --
 
+double precision function overlap_abs_gauss_r12_ao(D_center, delta, i, j)
+
+  BEGIN_DOC
+  ! \int dr AO_i(r) AO_j(r) e^{-delta |r-D_center|^2}
+  END_DOC
+
+  implicit none
+  integer,          intent(in) :: i, j
+  double precision, intent(in) :: D_center(3), delta
+
+  integer                      :: power_A(3), power_B(3), l, k
+  double precision             :: A_center(3), B_center(3), alpha, beta, coef, coef1, analytical_j
+
+  double precision, external   :: overlap_abs_gauss_r12
+
+  overlap_abs_gauss_r12_ao = 0.d0
+
+  if(ao_overlap_abs(j,i).lt.1.d-12) then
+    return
+  endif
+
+  power_A(1:3) = ao_power(i,1:3)
+  power_B(1:3) = ao_power(j,1:3)
+
+  A_center(1:3) = nucl_coord(ao_nucl(i),1:3)
+  B_center(1:3) = nucl_coord(ao_nucl(j),1:3)
+
+  do l = 1, ao_prim_num(i)
+    alpha = ao_expo_ordered_transp           (l,i)
+    coef1 = ao_coef_normalized_ordered_transp(l,i)
+
+    do k = 1, ao_prim_num(j)
+      beta = ao_expo_ordered_transp(k,j)
+      coef = coef1 * ao_coef_normalized_ordered_transp(k,j)
+
+      if(dabs(coef) .lt. 1d-12) cycle
+
+      analytical_j = overlap_abs_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
+      overlap_abs_gauss_r12_ao += dabs(coef * analytical_j)
+    enddo
+  enddo
+
+end function overlap_gauss_r12_ao
+
+! --
+
 subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
 
   BEGIN_DOC
@@ -177,7 +224,7 @@ subroutine overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_
   double precision, allocatable :: analytical_j(:)
 
   resv(:) = 0.d0
-  if(ao_overlap_abs(j,i).lt.1.d-12) then
+  if(ao_overlap_abs(j,i) .lt. 1.d-12) then
     return
   endif
 
@@ -313,9 +360,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
   ASSERT(beta .gt. 0.d0)
 
   if(beta .lt. 1d-10) then
-
     call overlap_gauss_r12_ao_v(D_center, LD_D, delta, i, j, resv, LD_resv, n_points)
-
     return
   endif
 
@@ -332,19 +377,20 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
   A1_center(1:3) = nucl_coord(ao_nucl(i),1:3)
   A2_center(1:3) = nucl_coord(ao_nucl(j),1:3)
 
-  allocate (fact_g(n_points), G_center(n_points,3), analytical_j(n_points) )
+  allocate(fact_g(n_points), G_center(n_points,3), analytical_j(n_points))
 
   bg  = beta  * gama_inv
   dg  = delta * gama_inv
   bdg = bg * delta 
-  do ipoint=1,n_points
+
+  do ipoint = 1, n_points
+
     G_center(ipoint,1) = bg * B_center(1) + dg * D_center(ipoint,1)
     G_center(ipoint,2) = bg * B_center(2) + dg * D_center(ipoint,2)
     G_center(ipoint,3) = bg * B_center(3) + dg * D_center(ipoint,3)
-    fact_g(ipoint) = bdg * ( &
-          (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1))  &
-        + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2))  &
-        + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
+    fact_g(ipoint) = bdg * ( (B_center(1) - D_center(ipoint,1)) * (B_center(1) - D_center(ipoint,1)) &
+                           + (B_center(2) - D_center(ipoint,2)) * (B_center(2) - D_center(ipoint,2)) &
+                           + (B_center(3) - D_center(ipoint,3)) * (B_center(3) - D_center(ipoint,3)) )
 
     if(fact_g(ipoint) < 10d0) then
       fact_g(ipoint) = dexp(-fact_g(ipoint))
@@ -368,8 +414,7 @@ subroutine overlap_gauss_r12_ao_with1s_v(B_center, beta, D_center, LD_D, delta,
       do ipoint = 1, n_points
         coef12f = coef12 * fact_g(ipoint)
         resv(ipoint) += coef12f * analytical_j(ipoint)
-      end do
-
+      enddo
     enddo
   enddo
 
diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
new file mode 100644
index 00000000..4dd87a60
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -0,0 +1,518 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+  double precision              :: int_gauss, dsqpi_3_2, int_j1b
+  double precision              :: factor_ij_1s, beta_ij, center_ij_1s(3), sq_pi_3_2 
+  double precision, allocatable :: int_fit_v(:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2_test ...'
+
+  sq_pi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points_transp j1b_pen List_comb_thr_b3_coef
+  call wall_time(wall0)
+
+  int2_grad1u2_grad2u2_j1b2_test(:,:,:) = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                              &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,                                     &
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_gauss,int_j1b,factor_ij_1s,beta_ij,center_ij_1s) &
+     !$OMP SHARED  (n_points_final_grid, ao_num, final_grid_points,List_comb_thr_b3_size,                   &
+     !$OMP          final_grid_points_transp, ng_fit_jast,                                                  &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,                                             &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,                                           &
+     !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,              &
+     !$OMP          ao_overlap_abs,sq_pi_3_2)
+ !$OMP DO SCHEDULE(dynamic)
+ do ipoint = 1, n_points_final_grid
+   r(1) = final_grid_points(1,ipoint)
+   r(2) = final_grid_points(2,ipoint)
+   r(3) = final_grid_points(3,ipoint)
+   do i = 1, ao_num
+     do j = i, ao_num
+       if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+         cycle
+       endif
+  
+       do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+  
+         do i_fit = 1, ng_fit_jast
+  
+           expo_fit = expo_gauss_1_erf_x_2(i_fit)
+           !DIR$ FORCEINLINE
+           call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+           coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+!           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+           if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle
+  
+!           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
+!                 expo_fit, i, j, int_fit_v, n_points_final_grid)
+           int_gauss = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+  
+           int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) += coef_fit * int_gauss 
+  
+         enddo
+        enddo
+       enddo
+     enddo
+   enddo
+
+   !$OMP END DO
+   !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao_num, n_points_final_grid)]
+!
+!  BEGIN_DOC
+!  !
+!  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
+!  !
+!  END_DOC
+!
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3)
+  double precision              :: tmp
+  double precision              :: wall0, wall1
+
+  double precision, allocatable :: int_fit_v(:),big_array(:,:,:)
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print*, ' providing int2_grad1u2_grad2u2_j1b2_test_v ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+  call wall_time(wall0)
+
+ double precision :: int_j1b
+ big_array(:,:,:) = 0.d0
+ allocate(big_array(n_points_final_grid,ao_num, ao_num))
+ !$OMP PARALLEL DEFAULT (NONE)                                       &
+     !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center,&
+     !$OMP          coef_fit, expo_fit, int_fit_v, tmp,int_j1b)                &
+     !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size,&
+     !$OMP          final_grid_points_transp, ng_fit_jast,               &
+     !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
+     !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
+     !$OMP          List_comb_thr_b3_cent, big_array,&
+     !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
+!
+ allocate(int_fit_v(n_points_final_grid))
+ !$OMP DO SCHEDULE(dynamic)
+ do i = 1, ao_num
+   do j = i, ao_num
+
+     if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+       cycle
+     endif
+
+      do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+         coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+         beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+         int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+!         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
+         B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+         B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+         B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+       do i_fit = 1, ng_fit_jast
+
+         expo_fit = expo_gauss_1_erf_x_2(i_fit)
+         coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
+
+         call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, size(final_grid_points_transp,1),&
+               expo_fit, i, j, int_fit_v, size(int_fit_v,1),n_points_final_grid)
+
+         do ipoint = 1, n_points_final_grid
+           big_array(ipoint,j,i) += coef_fit * int_fit_v(ipoint)
+         enddo
+
+       enddo
+
+     enddo
+   enddo
+ enddo
+ !$OMP END DO
+ deallocate(int_fit_v)
+ !$OMP END PARALLEL
+ do i = 1, ao_num
+   do j = i, ao_num
+    do ipoint = 1, n_points_final_grid
+     int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,j,i)
+    enddo
+   enddo
+  enddo
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_grad1u2_grad2u2_j1b2_test_v(j,i,ipoint) = big_array(ipoint,i,j)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2_test_v', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit
+  double precision              :: coef, beta, B_center(3), tmp
+  double precision              :: wall0, wall1,int_j1b
+
+  double precision, external    :: overlap_gauss_r12_ao
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+  double precision :: factor_ij_1s,beta_ij,center_ij_1s(3),sq_pi_3_2
+
+  print*, ' providing int2_u2_j1b2_test ...'
+
+  sq_pi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u2_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b,factor_ij_1s,beta_ij,center_ij_1s)          & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+          
+            expo_fit = expo_gauss_j_mu_x_2(i_fit)
+            coef_fit = coef_gauss_j_mu_x_2(i_fit)
+            !DIR$ FORCEINLINE
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+!            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-1.5d0)).lt.1.d-10)cycle
+          
+            ! ---
+          
+              int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+          
+              tmp += coef * coef_fit * int_fit
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u2_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u2_j1b2_test(j,i,ipoint) = int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u2_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s, i_fit
+  double precision :: r(3), int_fit(3), expo_fit, coef_fit
+  double precision :: coef, beta, B_center(3), dist
+  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
+  double precision :: tmp_x, tmp_y, tmp_z, int_j1b
+  double precision :: wall0, wall1, sq_pi_3_2,sq_alpha
+
+  print*, ' providing int2_u_grad1u_x_j1b2_test ...'
+
+  sq_pi_3_2 = dacos(-1.D0)**(1.d0)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  int2_u_grad1u_x_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
+ !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
+ !$OMP          tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha)                        & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
+ !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2)
+ !$OMP DO
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          do i_fit = 1, ng_fit_jast
+    
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+    
+            dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                        + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                        + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s 
+
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv)
+!            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version
+            if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle
+            
+            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
+
+            tmp_x += coef_tmp * int_fit(1)
+            tmp_y += coef_tmp * int_fit(2)
+            tmp_z += coef_tmp * int_fit(3)
+          enddo
+
+          ! ---
+
+        enddo
+
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = tmp_x
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = tmp_y
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,1) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,1)
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,2) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,2)
+        int2_u_grad1u_x_j1b2_test(j,i,ipoint,3) = int2_u_grad1u_x_j1b2_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_x_j1b2_test', wall1 - wall0
+
+END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, ipoint, i_1s, i_fit
+  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
+  double precision              :: coef, beta, B_center(3), dist
+  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
+  double precision              :: wall0, wall1
+  double precision, external    :: NAI_pol_mult_erf_ao_with1s
+  double precision :: j12_mu_r12,int_j1b
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+  double precision :: beta_ij,center_ij_1s(3),factor_ij_1s
+
+  print*, ' providing int2_u_grad1u_j1b2_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen ao_overlap_abs List_comb_thr_b3_cent
+  call wall_time(wall0)
+
+
+  int2_u_grad1u_j1b2_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
+ !$OMP          beta_ij,center_ij_1s,factor_ij_1s,               &
+ !$OMP          int_j1b,alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     &
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, &
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
+ !$OMP          ao_prod_dist_grid, ao_prod_sigma, ao_overlap_abs_grid,ao_prod_center,dsqpi_3_2,   &
+ !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,  ao_abs_comb_b3_j1b,     &
+ !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_j1b2_test)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+        r(1) = final_grid_points(1,ipoint)
+        r(2) = final_grid_points(2,ipoint)
+        r(3) = final_grid_points(3,ipoint)
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b3_size(j,i)
+
+          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
+          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
+                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
+                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
+
+          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
+            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
+            if(factor_ij_1s*dabs(coef*int_j1b)*dsqpi_3_2*beta_ij**(-1.5d0).lt.1.d-15)cycle
+            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
+
+            alpha_1s     = beta + expo_fit
+            alpha_1s_inv = 1.d0 / alpha_1s
+            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
+            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
+            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
+
+            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 20.d0) cycle
+            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
+            if(dabs(coef_tmp) .lt. 1d-08) cycle
+
+            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
+
+            tmp += coef_tmp * int_fit
+          enddo
+        enddo
+
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        int2_u_grad1u_j1b2_test(j,i,ipoint) = int2_u_grad1u_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for int2_u_grad1u_j1b2_test', wall1 - wall0
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
index b7fe234f..8196614f 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
@@ -19,9 +19,11 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
   double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_grad1u2_grad2u2_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_grad1u2_grad2u2_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -51,7 +53,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += -0.25d0 * coef_fit * int_fit
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -88,7 +90,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0
+  print*, ' wall time for int2_grad1u2_grad2u2_j1b2 =', wall1 - wall0
 
 END_PROVIDER 
 
@@ -111,9 +113,11 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
   double precision, external    :: overlap_gauss_r12_ao
   double precision, external    :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_u2_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_u2_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -143,7 +147,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final
 
           int_fit = overlap_gauss_r12_ao(r, expo_fit, i, j)
           tmp += coef_fit * int_fit
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit*int_fit) .lt. 1d-12) cycle
 
           ! ---
 
@@ -186,7 +190,7 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)]
 
   BEGIN_DOC
   !
@@ -202,9 +206,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   double precision :: tmp_x, tmp_y, tmp_z
   double precision :: wall0, wall1
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_u_grad1u_x_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_u_grad1u_x_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -241,7 +247,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
           tmp_x += coef_fit * int_fit(1)
           tmp_y += coef_fit * int_fit(2)
           tmp_z += coef_fit * int_fit(3)
-          if( (dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
+!          if( dabs(coef_fit)*(dabs(int_fit(1)) + dabs(int_fit(2)) + dabs(int_fit(3))) .lt. 3d-10 ) cycle
 
           ! ---
 
@@ -265,7 +271,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
+!            if(dabs(coef_tmp) .lt. 1d-12) cycle
             
             call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
@@ -278,9 +284,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
 
         enddo
 
-        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x
-        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y
-        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z
+        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = tmp_x
+        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = tmp_y
+        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = tmp_z
       enddo
     enddo
   enddo
@@ -290,15 +296,15 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_p
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
       do j = 1, i-1
-        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint)
-        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint)
-        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2)
+        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0
+  print*, ' wall time for int2_u_grad1u_x_j1b2 = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -320,9 +326,11 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
   double precision              :: wall0, wall1
   double precision, external    :: NAI_pol_mult_erf_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing int2_u_grad1u_j1b2 ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   int2_u_grad1u_j1b2 = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -351,7 +359,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
           ! ---
 
           int_fit = NAI_pol_mult_erf_ao_with1s(i, j, expo_fit, r, 1.d+9, r)
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(coef_fit)*dabs(int_fit) .lt. 1d-12) cycle
 
           tmp += coef_fit * int_fit
 
@@ -375,9 +383,10 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points
             centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
+            if(expo_coef_1s .gt. 80.d0) cycle
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-10) cycle
-            
+            if(dabs(coef_tmp) .lt. 1d-12) cycle
+
             int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
 
             tmp += coef_tmp * int_fit
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
index 6d3931f5..21927371 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_modif_vect.irp.f
@@ -241,7 +241,7 @@
 !
 !! ---
 !
-!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)]
+!BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (ao_num, ao_num, n_points_final_grid, 3)]
 !
 !  BEGIN_DOC
 !  !
@@ -308,7 +308,7 @@
 !
 !          ! ---
 !
-!          int2_u_grad1u_x_j1b2(1,j,i,ipoint) += coef_fit * int_fit_v(ipoint,1)
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,1) += coef_fit * int_fit_v(ipoint,1)
 !
 !          if(dabs(int_fit_v(ipoint,1)) .gt. 1d-10) then
 !            i_mask_grid1 += 1
@@ -320,7 +320,7 @@
 !
 !          ! ---
 !
-!          int2_u_grad1u_x_j1b2(2,j,i,ipoint) += coef_fit * int_fit_v(ipoint,2)
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,2) += coef_fit * int_fit_v(ipoint,2)
 !
 !          if(dabs(int_fit_v(ipoint,2)) .gt. 1d-10) then
 !            i_mask_grid2 += 1
@@ -332,7 +332,7 @@
 !
 !          ! ---
 !
-!          int2_u_grad1u_x_j1b2(3,j,i,ipoint) += coef_fit * int_fit_v(ipoint,3)
+!          int2_u_grad1u_x_j1b2(j,i,ipoint,3) += coef_fit * int_fit_v(ipoint,3)
 !
 !          if(dabs(int_fit_v(ipoint,3)) .gt. 1d-10) then
 !            i_mask_grid3 += 1
@@ -408,15 +408,15 @@
 !          call NAI_pol_x_mult_erf_ao_with1s_v(i, j, alpha_1s, centr_1s, n_points_final_grid, 1.d+9, r_mask_grid, n_points_final_grid, int_fit_v, n_points_final_grid, i_mask_grid)
 !
 !          do ipoint = 1, i_mask_grid1
-!            int2_u_grad1u_x_j1b2(1,j,i,n_mask_grid(ipoint,1)) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1)
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,1),1) += coef * dexp(-expo_coef_1s * dist(ipoint,1)) * int_fit_v(ipoint,1)
 !          enddo
 !
 !          do ipoint = 1, i_mask_grid2
-!            int2_u_grad1u_x_j1b2(2,j,i,n_mask_grid(ipoint,2)) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2)
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,2),2) += coef * dexp(-expo_coef_1s * dist(ipoint,2)) * int_fit_v(ipoint,2)
 !          enddo
 !
 !          do ipoint = 1, i_mask_grid3
-!            int2_u_grad1u_x_j1b2(3,j,i,n_mask_grid(ipoint,3)) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3)
+!            int2_u_grad1u_x_j1b2(j,i,n_mask_grid(ipoint,3),3) += coef * dexp(-expo_coef_1s * dist(ipoint,3)) * int_fit_v(ipoint,3)
 !          enddo
 !
 !        enddo
@@ -439,15 +439,15 @@
 !  do ipoint = 1, n_points_final_grid
 !    do i = 2, ao_num
 !      do j = 1, i-1
-!        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint)
-!        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint)
-!        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,1) = int2_u_grad1u_x_j1b2(i,j,ipoint,1)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,2) = int2_u_grad1u_x_j1b2(i,j,ipoint,2)
+!        int2_u_grad1u_x_j1b2(j,i,ipoint,3) = int2_u_grad1u_x_j1b2(i,j,ipoint,3)
 !      enddo
 !    enddo
 !  enddo
 !
 !  call wall_time(wall1)
-!  print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0
+!  print*, ' wall time for int2_u_grad1u_x_j1b2 =', wall1 - wall0
 !
 !END_PROVIDER
 !
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
new file mode 100644
index 00000000..a6a55810
--- /dev/null
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -0,0 +1,369 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R| - 1) / |r - R|
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_mu, int_coulomb
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp,int_j1b
+  double precision           :: wall0, wall1
+  double precision, external :: NAI_pol_mult_erf_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2
+
+  print*, ' providing v_ij_erf_rk_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                         &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp, int_j1b)& 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points, &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,ao_abs_comb_b2_j1b,  &
+ !$OMP          v_ij_erf_rk_cst_mu_j1b_test, mu_erf,                                   &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+          ! TODO :: cycle on the 1 - erf(mur12)
+          int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
+          int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
+
+          tmp += coef * (int_mu - int_coulomb)
+        enddo
+
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) = v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
+  END_DOC
+
+  implicit none
+  integer          :: i, j, ipoint, i_1s
+  double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
+  double precision :: tmp_x, tmp_y, tmp_z
+  double precision :: wall0, wall1
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b,factor_ij_1s,beta_ij,center_ij_1s
+
+  print*, ' providing x_v_ij_erf_rk_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide expo_erfc_mu_gauss ao_prod_sigma ao_prod_center
+  call wall_time(wall0)
+
+  x_v_ij_erf_rk_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
+ !$OMP          int_j1b, tmp_x, tmp_y, tmp_z,factor_ij_1s,beta_ij,center_ij_1s)       & 
+ !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b2_size, final_grid_points,&
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo, List_comb_thr_b2_cent,  &
+ !$OMP          x_v_ij_erf_rk_cst_mu_j1b_test, mu_erf,ao_abs_comb_b2_j1b,         &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma)
+! !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2,expo_erfc_mu_gauss)
+ !$OMP DO
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+
+        tmp_x = 0.d0
+        tmp_y = 0.d0
+        tmp_z = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          if(ao_prod_center(1,j,i).ne.10000.d0)then
+!           ! approximate 1 - erf(mu r12) by a gaussian * 10
+!           !DIR$ FORCEINLINE
+!           call gaussian_product(expo_erfc_mu_gauss,r,     &
+!                ao_prod_sigma(j,i),ao_prod_center(1,j,i),  & 
+!                factor_ij_1s,beta_ij,center_ij_1s)
+!           if(dabs(coef * factor_ij_1s*int_j1b*10.d0 * dsqpi_3_2 * beta_ij**(-1.5d0)).lt.1.d-10)cycle 
+!          endif
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
+          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
+
+          tmp_x += coef * (ints(1) - ints_coulomb(1))
+          tmp_y += coef * (ints(2) - ints_coulomb(2))
+          tmp_z += coef * (ints(3) - ints_coulomb(3))
+        enddo
+
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = tmp_x
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = tmp_y
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = tmp_z
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2)
+        x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+! TODO analytically
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12)
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s, i_fit
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+
+  print*, ' providing v_ij_u_cst_mu_j1b_test ...'
+
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, ng_fit_jast,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_gauss_j_mu_x(i_fit)
+            coef_fit = coef_gauss_j_mu_x(i_fit)
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12) with u(mu,r12) \approx 1/2 mu e^{-2.5 * mu (r12)^2}
+  !
+  END_DOC
+
+  implicit none
+  integer                    :: i, j, ipoint, i_1s
+  double precision           :: r(3), int_fit, expo_fit, coef_fit
+  double precision           :: coef, beta, B_center(3)
+  double precision           :: tmp
+  double precision           :: wall0, wall1
+
+  double precision, external :: overlap_gauss_r12_ao_with1s
+  double precision :: sigma_ij,dist_ij_ipoint,dsqpi_3_2,int_j1b
+  dsqpi_3_2 = (dacos(-1.d0))**(1.5d0)
+
+  provide mu_erf final_grid_points j1b_pen
+  call wall_time(wall0)
+
+  v_ij_u_cst_mu_j1b_ng_1_test = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, i_1s,  r, coef, beta, B_center, &
+ !$OMP          beta_ij_u, factor_ij_1s_u, center_ij_1s_u,          &
+ !$OMP          coef_fit, expo_fit, int_fit, tmp,coeftot,int_j1b)                   & 
+ !$OMP SHARED  (n_points_final_grid, ao_num,  & 
+ !$OMP          final_grid_points, expo_good_j_mu_1gauss,coef_good_j_mu_1gauss,                  &
+ !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
+ !$OMP          List_comb_thr_b2_coef, List_comb_thr_b2_expo,List_comb_thr_b2_size,       & 
+ !$OMP          List_comb_thr_b2_cent, v_ij_u_cst_mu_j1b_ng_1_test,ao_abs_comb_b2_j1b,      &
+ !$OMP          ao_overlap_abs_grid,ao_prod_center,ao_prod_sigma,dsqpi_3_2)
+ !$OMP DO
+  !do ipoint = 1, 10
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = i, ao_num
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+
+        tmp = 0.d0
+        do i_1s = 1, List_comb_thr_b2_size(j,i)
+
+          coef        = List_comb_thr_b2_coef  (i_1s,j,i)
+          beta        = List_comb_thr_b2_expo  (i_1s,j,i)
+          int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
+          B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
+          B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
+
+!          do i_fit = 1, ng_fit_jast
+
+            expo_fit = expo_good_j_mu_1gauss
+            coef_fit = 1.d0
+            coeftot = coef * coef_fit
+            if(dabs(coeftot).lt.1.d-15)cycle
+            double precision :: beta_ij_u, factor_ij_1s_u, center_ij_1s_u(3),coeftot
+            call gaussian_product(beta,B_center,expo_fit,r,factor_ij_1s_u,beta_ij_u,center_ij_1s_u)
+            if(factor_ij_1s_u*ao_overlap_abs_grid(j,i).lt.1.d-15)cycle
+            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+
+            tmp += coef * coef_fit * int_fit
+!          enddo
+        enddo
+
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = tmp
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do ipoint = 1, n_points_final_grid
+    do i = 2, ao_num
+      do j = 1, i-1
+        v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint) = v_ij_u_cst_mu_j1b_ng_1_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+ 
+  call wall_time(wall1)
+  print*, ' wall time for v_ij_u_cst_mu_j1b_ng_1_test', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
index 6a662533..fc30cd83 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
@@ -17,9 +17,11 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
   double precision           :: wall0, wall1
   double precision, external :: NAI_pol_mult_erf_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print *, ' providing v_ij_erf_rk_cst_mu_j1b ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   v_ij_erf_rk_cst_mu_j1b = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                                         &
@@ -49,7 +51,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_po
 
         int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
         int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
-        if(dabs(int_mu - int_coulomb) .lt. 1d-10) cycle
+!        if(dabs(coef)*dabs(int_mu - int_coulomb) .lt. 1d-12) cycle
 
         tmp += coef * (int_mu - int_coulomb)
 
@@ -99,51 +101,23 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_
   ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
   END_DOC
 
-  implicit none
-  integer          :: i, j, ipoint
-  double precision :: wall0, wall1
-
-  call wall_time(wall0)
-
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = 1, ao_num
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
-  END_DOC
-
   implicit none
   integer          :: i, j, ipoint, i_1s
   double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
   double precision :: tmp_x, tmp_y, tmp_z
   double precision :: wall0, wall1
 
+  print*, ' providing x_v_ij_erf_rk_cst_mu_j1b ...'
   call wall_time(wall0)
 
-  x_v_ij_erf_rk_cst_mu_tmp_j1b = 0.d0
+  x_v_ij_erf_rk_cst_mu_j1b = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                                        &
  !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
  !$OMP          tmp_x, tmp_y, tmp_z)                                                  & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,&
  !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,  &
- !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b, mu_erf)
+ !$OMP          x_v_ij_erf_rk_cst_mu_j1b, mu_erf)
  !$OMP DO
   !do ipoint = 1, 10
   do ipoint = 1, n_points_final_grid
@@ -169,7 +143,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
         call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
 
-        if( (dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
+!        if( dabs(coef)*(dabs(ints(1)-ints_coulomb(1)) + dabs(ints(2)-ints_coulomb(2)) + dabs(ints(3)-ints_coulomb(3))) .lt. 3d-10) cycle
 
         tmp_x += coef * (ints(1) - ints_coulomb(1))
         tmp_y += coef * (ints(2) - ints_coulomb(2))
@@ -195,9 +169,9 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
 
         ! ---
 
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = tmp_x
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = tmp_y
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = tmp_z
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = tmp_x
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = tmp_y
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = tmp_z
       enddo
     enddo
   enddo
@@ -207,15 +181,15 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_
   do ipoint = 1, n_points_final_grid
     do i = 2, ao_num
       do j = 1, i-1
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint)
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint)
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2)
+        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3)
       enddo
     enddo
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp_j1b', wall1 - wall0
+  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b =', wall1 - wall0
 
 END_PROVIDER 
 
@@ -239,9 +213,11 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
 
   double precision, external :: overlap_gauss_r12_ao_with1s
 
-  provide mu_erf final_grid_points j1b_pen
+  print*, ' providing v_ij_u_cst_mu_j1b ...'
   call wall_time(wall0)
 
+  provide mu_erf final_grid_points j1b_pen
+
   v_ij_u_cst_mu_j1b = 0.d0
 
  !$OMP PARALLEL DEFAULT (NONE)                                      &
@@ -277,7 +253,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_
           B_center(3) = List_all_comb_b2_cent(3,1)
 
           int_fit = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
-          if(dabs(int_fit) .lt. 1d-10) cycle
+!          if(dabs(int_fit*coef) .lt. 1d-12) cycle
 
           tmp += coef * coef_fit * int_fit
 
diff --git a/src/ao_many_one_e_ints/grad_related_ints.irp.f b/src/ao_many_one_e_ints/grad_related_ints.irp.f
index 67fb0fe7..8624e7b8 100644
--- a/src/ao_many_one_e_ints/grad_related_ints.irp.f
+++ b/src/ao_many_one_e_ints/grad_related_ints.irp.f
@@ -17,6 +17,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points
 
   double precision :: NAI_pol_mult_erf_ao
 
+  print*, ' providing v_ij_erf_rk_cst_mu ...'
+
   provide mu_erf final_grid_points 
   call wall_time(wall0)
 
@@ -54,7 +56,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points
   enddo
  
   call wall_time(wall1)
-  print*, ' wall time for v_ij_erf_rk_cst_mu  ', wall1 - wall0
+  print*, ' wall time for v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -73,6 +75,8 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_transp, (n_points_final_gr
   double precision :: wall0, wall1
   double precision :: NAI_pol_mult_erf_ao
 
+  print *, ' providing v_ij_erf_rk_cst_mu_transp ...'
+
   provide mu_erf final_grid_points 
   call wall_time(wall0)
 
@@ -107,7 +111,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_transp, (n_points_final_gr
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for v_ij_erf_rk_cst_mu_transp  ', wall1 - wall0
+  print *, ' wall time for v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -124,6 +128,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp, (3, ao_num, ao_num,
   double precision :: r(3), ints(3), ints_coulomb(3)
   double precision :: wall0, wall1
 
+  print*, ' providing x_v_ij_erf_rk_cst_mu_tmp ...'
+
   call wall_time(wall0)
 
  !$OMP PARALLEL                                 &
@@ -162,13 +168,13 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp, (3, ao_num, ao_num,
   enddo
 
   call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_points_final_grid,3)]
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num, n_points_final_grid, 3)]
 
   BEGIN_DOC
   ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
@@ -178,6 +184,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_point
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_v_ij_erf_rk_cst_mu ...'
+
   call wall_time(wall0)
 
   do ipoint = 1, n_points_final_grid
@@ -191,7 +199,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu, (ao_num, ao_num,n_point
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for x_v_ij_erf_rk_cst_mu', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -207,6 +215,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp, (ao_num, ao_num,
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_v_ij_erf_rk_cst_mu_transp ...'
+
   call wall_time(wall0)
 
   do ipoint = 1, n_points_final_grid
@@ -220,13 +230,13 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp, (ao_num, ao_num,
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp = ', wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
 
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_final_grid,ao_num, ao_num,3)]
+BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_final_grid, ao_num, ao_num, 3)]
 
   BEGIN_DOC
   ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/|r - R|
@@ -236,6 +246,8 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_fi
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_v_ij_erf_rk_cst_mu_transp_bis ...'
+
   call wall_time(wall0)
 
   do i = 1, ao_num
@@ -249,7 +261,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_transp_bis, (n_points_fi
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp_bis', wall1 - wall0
+  print *, ' wall time for x_v_ij_erf_rk_cst_mu_transp_bis = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -268,7 +280,9 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_fin
  double precision :: r(3), ints(3), ints_coulomb(3)
  double precision :: wall0, wall1
 
- call wall_time(wall0)
+  print *, ' providing d_dx_v_ij_erf_rk_cst_mu_tmp ...'
+
+  call wall_time(wall0)
 
  !$OMP PARALLEL                                 &
  !$OMP DEFAULT (NONE)                           &
@@ -295,7 +309,7 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_fin
  !$OMP END PARALLEL
 
   call wall_time(wall1)
-  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0
+  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -315,6 +329,8 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing d_dx_v_ij_erf_rk_cst_mu ...'
+
   call wall_time(wall0)
   do i = 1, ao_num
     do j = 1, ao_num
@@ -327,7 +343,7 @@ BEGIN_PROVIDER [ double precision, d_dx_v_ij_erf_rk_cst_mu, (n_points_final_grid
   enddo
 
   call wall_time(wall1)
-  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu', wall1 - wall0
+  print *, ' wall time for d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -348,6 +364,8 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_f
   double precision :: r(3), ints(3), ints_coulomb(3)
   double precision :: wall0, wall1
 
+  print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu_tmp ...'
+
   call wall_time(wall0)
 
  !$OMP PARALLEL                                 &
@@ -375,7 +393,7 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu_tmp, (3, n_points_f
  !$OMP END PARALLEL
 
   call wall_time(wall1)
-  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu_tmp', wall1 - wall0
+  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu_tmp = ', wall1 - wall0
 
 END_PROVIDER 
 
@@ -395,6 +413,8 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu, (n_points_final_gr
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing x_d_dx_v_ij_erf_rk_cst_mu ...'
+
   call wall_time(wall0)
 
   do i = 1, ao_num
@@ -408,7 +428,7 @@ BEGIN_PROVIDER [ double precision, x_d_dx_v_ij_erf_rk_cst_mu, (n_points_final_gr
   enddo
  
   call wall_time(wall1)
-  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu', wall1 - wall0
+  print *, ' wall time for x_d_dx_v_ij_erf_rk_cst_mu = ', wall1 - wall0
 
 END_PROVIDER 
 
diff --git a/src/ao_many_one_e_ints/list_grid.irp.f b/src/ao_many_one_e_ints/list_grid.irp.f
new file mode 100644
index 00000000..ccdc33ad
--- /dev/null
+++ b/src/ao_many_one_e_ints/list_grid.irp.f
@@ -0,0 +1,59 @@
+ BEGIN_PROVIDER [ integer, n_pts_grid_ao_prod, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_n_pts_grid_ao_prod]
+ implicit none
+ integer :: i,j,ipoint
+ double precision :: overlap, r(3),thr, overlap_abs_gauss_r12_ao,overlap_gauss_r12_ao
+ double precision :: sigma,dist,center_ij(3),fact_gauss, alpha, center(3)
+ n_pts_grid_ao_prod = 0
+ thr = 1.d-11
+ print*,' expo_good_j_mu_1gauss = ',expo_good_j_mu_1gauss
+ !$OMP PARALLEL DEFAULT (NONE)                                      &
+ !$OMP PRIVATE (ipoint, i, j, r, overlap, thr,fact_gauss, alpha, center,dist,sigma,center_ij) &
+ !$OMP SHARED  (n_points_final_grid, ao_num, ao_overlap_abs_grid,n_pts_grid_ao_prod,expo_good_j_mu_1gauss,&
+ !$OMP          final_grid_points,ao_prod_center,ao_prod_sigma,ao_nucl)
+ !$OMP DO
+ do i = 1, ao_num
+! do i = 3,3
+  do j = 1, ao_num
+! do i = 22,22
+!  do j = 9,9
+   center_ij(1:3) = ao_prod_center(1:3,j,i)
+   sigma = ao_prod_sigma(j,i)
+   sigma *= sigma
+   sigma = 0.5d0 /sigma
+!   if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+   do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+    dist  = (center_ij(1) - r(1))*(center_ij(1) - r(1))
+    dist += (center_ij(2) - r(2))*(center_ij(2) - r(2))
+    dist += (center_ij(3) - r(3))*(center_ij(3) - r(3))
+    dist = dsqrt(dist)
+    call gaussian_product(sigma, center_ij, expo_good_j_mu_1gauss, r, fact_gauss, alpha, center)
+!    print*,''
+!    print*,j,i,ao_overlap_abs_grid(j,i),ao_overlap_abs(j,i)
+!    print*,r
+!    print*,dist,sigma
+!    print*,fact_gauss
+    if( fact_gauss*ao_overlap_abs_grid(j,i).lt.1.d-11)cycle
+    if(ao_nucl(i) == ao_nucl(j))then
+     overlap = overlap_abs_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    else
+     overlap = overlap_gauss_r12_ao(r, expo_good_j_mu_1gauss, i, j)
+    endif
+!    print*,overlap
+    if(dabs(overlap).lt.thr)cycle
+    n_pts_grid_ao_prod(j,i) += 1
+   enddo
+  enddo
+ enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(n_pts_grid_ao_prod(:,i))
+ enddo
+ max_n_pts_grid_ao_prod = maxval(list) 
+END_PROVIDER 
diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f
index 0b40170c..e27bf723 100644
--- a/src/ao_many_one_e_ints/listj1b.irp.f
+++ b/src/ao_many_one_e_ints/listj1b.irp.f
@@ -102,6 +102,12 @@ END_PROVIDER
     List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i))
   enddo
 
+  print *, ' coeff, expo & cent of list b2'
+  do i = 1, List_all_comb_b2_size
+    print*, i, List_all_comb_b2_coef(i), List_all_comb_b2_expo(i)
+    print*, List_all_comb_b2_cent(1,i), List_all_comb_b2_cent(2,i), List_all_comb_b2_cent(3,i)
+  enddo
+
 END_PROVIDER
 
 ! ---
@@ -168,7 +174,6 @@ END_PROVIDER
 
     do j = 1, nucl_num
       tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
-      !print*, List_all_comb_b3(j,i), j1b_pen(j)
       List_all_comb_b3_expo(i)   += tmp_alphaj
       List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1)
       List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2)
@@ -220,9 +225,11 @@ END_PROVIDER
     List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i))
   enddo
 
-  print *, ' 1st coeff & expo of lists'
-  print*, List_all_comb_b2_coef(1), List_all_comb_b2_expo(1)
-  print*, List_all_comb_b3_coef(1), List_all_comb_b3_expo(1)
+  print *, ' coeff, expo & cent of list b3'
+  do i = 1, List_all_comb_b3_size
+    print*, i, List_all_comb_b3_coef(i), List_all_comb_b3_expo(i)
+    print*, List_all_comb_b3_cent(1,i), List_all_comb_b3_cent(2,i), List_all_comb_b3_cent(3,i)
+  enddo
 
 END_PROVIDER
 
diff --git a/src/ao_many_one_e_ints/listj1b_sorted.irp.f b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
new file mode 100644
index 00000000..bf493fbb
--- /dev/null
+++ b/src/ao_many_one_e_ints/listj1b_sorted.irp.f
@@ -0,0 +1,191 @@
+
+ BEGIN_PROVIDER [ integer, List_comb_thr_b2_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b2_size]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ List_comb_thr_b2_size = 0
+ do i = 1, ao_num
+  do j = i, ao_num
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-15)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     beta = max(beta,1.d-12)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_thr_b2_size(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+ do i = 1, ao_num
+  do j = 1, i-1
+    List_comb_thr_b2_size(j,i) = List_comb_thr_b2_size(i,j)
+  enddo
+ enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_thr_b2_size(:,i))
+ enddo
+ max_List_comb_thr_b2_size = maxval(list) 
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b2_coef, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_expo, (   max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b2_cent, (3, max_List_comb_thr_b2_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b2_j1b, ( max_List_comb_thr_b2_size ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ ao_abs_comb_b2_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = i, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b2_size
+     coef        = List_all_comb_b2_coef  (i_1s)
+     if(dabs(coef).lt.1.d-12)cycle
+     beta        = List_all_comb_b2_expo  (i_1s)
+     center(1:3) = List_all_comb_b2_cent(1:3,i_1s)
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b2_coef(icount,j,i) = coef
+      List_comb_thr_b2_expo(icount,j,i) = beta
+      List_comb_thr_b2_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b2_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+ do i = 1, ao_num
+  do j = 1, i-1
+    do icount = 1, List_comb_thr_b2_size(j,i)
+     List_comb_thr_b2_coef(icount,j,i) = List_comb_thr_b2_coef(icount,i,j)
+     List_comb_thr_b2_expo(icount,j,i) = List_comb_thr_b2_expo(icount,i,j)
+     List_comb_thr_b2_cent(1:3,icount,j,i) = List_comb_thr_b2_cent(1:3,icount,i,j)
+    enddo
+  enddo
+ enddo
+ 
+END_PROVIDER 
+
+
+ BEGIN_PROVIDER [ integer, List_comb_thr_b3_size, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ integer, max_List_comb_thr_b3_size]
+ implicit none
+ integer :: i_1s,i,j,ipoint
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ List_comb_thr_b3_size = 0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     beta        = List_all_comb_b3_expo  (i_1s)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     if(dabs(coef).lt.thr)cycle
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      List_comb_thr_b3_size(j,i) += 1
+     endif
+   enddo
+  enddo 
+ enddo
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    List_comb_thr_b3_size(j,i) = List_comb_thr_b3_size(i,j)
+!  enddo
+! enddo
+ integer :: list(ao_num)
+ do i = 1, ao_num
+  list(i) = maxval(List_comb_thr_b3_size(:,i))
+ enddo
+ max_List_comb_thr_b3_size = maxval(list) 
+ print*,'max_List_comb_thr_b3_size =  ',max_List_comb_thr_b3_size
+ 
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, List_comb_thr_b3_coef, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_expo, (   max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, List_comb_thr_b3_cent, (3, max_List_comb_thr_b3_size,ao_num, ao_num )]
+&BEGIN_PROVIDER [ double precision, ao_abs_comb_b3_j1b, ( max_List_comb_thr_b3_size ,ao_num, ao_num)]
+ implicit none
+ integer :: i_1s,i,j,ipoint,icount
+ double precision :: coef,beta,center(3),int_j1b,thr
+ double precision :: r(3),weight,dist
+ thr = 1.d-15
+ ao_abs_comb_b3_j1b = 10000000.d0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   icount = 0
+   do i_1s = 1, List_all_comb_b3_size
+     coef        = List_all_comb_b3_coef  (i_1s)
+     beta        = List_all_comb_b3_expo  (i_1s)
+     beta = max(beta,1.d-12)
+     center(1:3) = List_all_comb_b3_cent(1:3,i_1s)
+     if(dabs(coef).lt.thr)cycle
+     int_j1b = 0.d0
+     do ipoint = 1, n_points_extra_final_grid
+      r(1:3) = final_grid_points_extra(1:3,ipoint)
+      weight = final_weight_at_r_vector_extra(ipoint)
+      dist  = ( center(1) - r(1) )*( center(1) - r(1) )
+      dist += ( center(2) - r(2) )*( center(2) - r(2) )
+      dist += ( center(3) - r(3) )*( center(3) - r(3) )
+      int_j1b += dabs(aos_in_r_array_extra_transp(ipoint,i) * aos_in_r_array_extra_transp(ipoint,j))*dexp(-beta*dist) * weight
+     enddo
+     if(dabs(coef)*dabs(int_j1b).gt.thr)then
+      icount += 1
+      List_comb_thr_b3_coef(icount,j,i) = coef
+      List_comb_thr_b3_expo(icount,j,i) = beta
+      List_comb_thr_b3_cent(1:3,icount,j,i) = center(1:3)
+      ao_abs_comb_b3_j1b(icount,j,i) = int_j1b
+     endif
+   enddo
+  enddo 
+ enddo
+
+! do i = 1, ao_num
+!  do j = 1, i-1
+!    do icount = 1, List_comb_thr_b3_size(j,i)
+!     List_comb_thr_b3_coef(icount,j,i) = List_comb_thr_b3_coef(icount,i,j)
+!     List_comb_thr_b3_expo(icount,j,i) = List_comb_thr_b3_expo(icount,i,j)
+!     List_comb_thr_b3_cent(1:3,icount,j,i) = List_comb_thr_b3_cent(1:3,icount,i,j)
+!    enddo
+!  enddo
+! enddo
+ 
+END_PROVIDER 
+
diff --git a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
index cfdaf95f..54c2d95b 100644
--- a/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_gauss_gauss.irp.f
@@ -1,5 +1,9 @@
-double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+! ---
+
+double precision function overlap_gauss_r12(D_center, delta, A_center, B_center, power_A, power_B, alpha, beta)
+
   BEGIN_DOC
+  !
   ! Computes the following integral :
   !
   ! .. math                      ::
@@ -8,6 +12,72 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
   !
   END_DOC
 
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: D_center(3), delta  ! pure gaussian "D"
+  double precision, intent(in) :: A_center(3),B_center(3),alpha,beta ! gaussian/polynoms "A" and "B"
+  integer, intent(in)          :: power_A(3),power_B(3)
+
+  double precision             :: overlap_x,overlap_y,overlap_z,overlap
+  ! First you multiply the usual gaussian "A" with the gaussian exp(-delta (r - D)^2 )
+  double precision             :: A_new(0:max_dim,3)! new polynom
+  double precision             :: A_center_new(3)   ! new center
+  integer                      :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+  double precision             :: alpha_new         ! new exponent
+  double precision             :: fact_a_new        ! constant factor
+  double precision             :: accu, coefx, coefy, coefz, coefxy, coefxyz, thr
+  integer                      :: d(3), i, lx, ly, lz, iorder_tmp(3), dim1
+
+  dim1 = 100
+  thr  = 1.d-10
+  d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_gauss_r12 = 0.d0
+
+  ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
+  call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
+      delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
+  ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
+  accu = 0.d0
+  do lx = 0, iorder_a_new(1)
+    coefx = A_new(lx,1)*fact_a_new
+    if(dabs(coefx).lt.thr)cycle
+    iorder_tmp(1) = lx
+
+    do ly = 0, iorder_a_new(2)
+      coefy  = A_new(ly,2)
+      coefxy = coefx * coefy
+      if(dabs(coefxy) .lt. thr) cycle
+      iorder_tmp(2) = ly
+
+      do lz = 0, iorder_a_new(3)
+        coefz   = A_new(lz,3)
+        coefxyz = coefxy * coefz
+        if(dabs(coefxyz) .lt. thr) cycle
+        iorder_tmp(3) = lz
+
+        call overlap_gaussian_xyz( A_center_new, B_center, alpha_new, beta, iorder_tmp, power_B &
+                                 , overlap_x, overlap_y, overlap_z, overlap, dim1)
+
+        accu += coefxyz * overlap
+      enddo
+    enddo
+  enddo
+  overlap_gauss_r12 = accu
+end
+
+!---
+double precision function overlap_abs_gauss_r12(D_center,delta,A_center,B_center,power_A,power_B,alpha,beta)
+  BEGIN_DOC
+  ! Computes the following integral :
+  !
+  ! .. math                      ::
+  !
+  !   \int dr exp(-delta (r - D)^2 ) |(x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )|
+  !
+  END_DOC
+
   implicit none
   include 'constants.include.F'
   double precision, intent(in)   :: D_center(3), delta  ! pure gaussian "D"
@@ -21,20 +91,23 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
   integer                        :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
   double precision               :: alpha_new         ! new exponent
   double precision               :: fact_a_new        ! constant factor
-  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr
+  double precision               :: accu,coefx,coefy,coefz,coefxy,coefxyz,thr,dx,lower_exp_val
   integer                        :: d(3),i,lx,ly,lz,iorder_tmp(3),dim1
-  dim1=100
-  thr = 1.d-10
+  dim1=50
+  lower_exp_val = 40.d0
+  thr = 1.d-12
   d(:) = 0 ! order of the polynom for the gaussian exp(-delta (r - D)^2 )  == 0
+  overlap_abs_gauss_r12 = 0.d0
 
   ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order
   call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new ,&
       delta,alpha,d,power_A,D_center,A_center,n_pt_max_integrals)
+  if(fact_a_new.lt.thr)return
   ! The new gaussian exp(-delta (r - D)^2 ) (x-A_x)^a \exp(-\alpha (x-A_x)^2
   accu = 0.d0
   do lx = 0, iorder_a_new(1)
-    coefx = A_new(lx,1)
-    if(dabs(coefx).lt.thr)cycle
+    coefx = A_new(lx,1)*fact_a_new
+!    if(dabs(coefx).lt.thr)cycle
     iorder_tmp(1) = lx
     do ly = 0, iorder_a_new(2)
       coefy = A_new(ly,2)
@@ -46,12 +119,14 @@ double precision function overlap_gauss_r12(D_center,delta,A_center,B_center,pow
         coefxyz = coefxy * coefz
         if(dabs(coefxyz).lt.thr)cycle
         iorder_tmp(3) = lz
-        call overlap_gaussian_xyz(A_center_new,B_center,alpha_new,beta,iorder_tmp,power_B,overlap_x,overlap_y,overlap_z,overlap,dim1)
-        accu += coefxyz * overlap
+        call overlap_x_abs(A_center_new(1),B_center(1),alpha_new,beta,iorder_tmp(1),power_B(1),overlap_x,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(2),B_center(2),alpha_new,beta,iorder_tmp(2),power_B(2),overlap_y,lower_exp_val,dx,dim1)
+        call overlap_x_abs(A_center_new(3),B_center(3),alpha_new,beta,iorder_tmp(3),power_B(3),overlap_z,lower_exp_val,dx,dim1)
+        accu += dabs(coefxyz * overlap_x * overlap_y * overlap_z)
       enddo
     enddo
   enddo
-  overlap_gauss_r12 = fact_a_new * accu
+  overlap_abs_gauss_r12= accu
 end
 
 !---
@@ -95,11 +170,9 @@ subroutine overlap_gauss_r12_v(D_center, LD_D, delta, A_center, B_center, power_
 
   maxab = maxval(power_A(1:3))
 
-  allocate(A_new(n_points, 0:maxab, 3), A_center_new(n_points, 3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
+  allocate(A_new(n_points,0:maxab,3), A_center_new(n_points,3), fact_a_new(n_points), iorder_a_new(3), overlap(n_points))
 
-  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new,   &
-        alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, &
-        D_center, LD_D, A_center, n_points)
+  call give_explicit_poly_and_gaussian_v(A_new, maxab, A_center_new, alpha_new, fact_a_new, iorder_a_new, delta, alpha, d, power_A, D_center, LD_D, A_center, n_points)
 
   rvec(:) = 0.d0
 
diff --git a/src/ao_tc_eff_map/fit_j.irp.f b/src/ao_tc_eff_map/fit_j.irp.f
index 8fad9079..4730d003 100644
--- a/src/ao_tc_eff_map/fit_j.irp.f
+++ b/src/ao_tc_eff_map/fit_j.irp.f
@@ -1,5 +1,40 @@
+ BEGIN_PROVIDER [ double precision, expo_j_xmu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_j_xmu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! Upper bound long range fit of F(x) = x * (1 - erf(x)) - 1/sqrt(pi) * exp(-x**2) 
+ !
+ ! with a single gaussian. 
+ !
+ ! Such a function can be used to screen integrals with F(x). 
+ END_DOC
+ expo_j_xmu_1gauss  = 0.5d0
+ coef_j_xmu_1gauss  = 1.d0
+END_PROVIDER 
 ! ---
 
+BEGIN_PROVIDER [ double precision, expo_erfc_gauss ]
+ implicit none 
+ expo_erfc_gauss = 1.41211d0
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, expo_erfc_mu_gauss ]
+ implicit none 
+ expo_erfc_mu_gauss = expo_erfc_gauss * mu_erf * mu_erf
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, expo_good_j_mu_1gauss ]
+&BEGIN_PROVIDER [ double precision, coef_good_j_mu_1gauss ]
+ implicit none
+ BEGIN_DOC
+ ! exponent of Gaussian in order to obtain an upper bound of J(r12,mu)
+ !
+ ! Can be used to scree integrals with J(r12,mu)
+ END_DOC
+ expo_good_j_mu_1gauss = 2.D0 * mu_erf * expo_j_xmu_1gauss
+ coef_good_j_mu_1gauss = 0.5d0/mu_erf * coef_j_xmu_1gauss
+ END_PROVIDER 
+
 BEGIN_PROVIDER [ double precision, expo_j_xmu, (n_fit_1_erf_x) ]
  implicit none
  BEGIN_DOC
@@ -88,6 +123,36 @@ END_PROVIDER
       expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_x = (/ -0.01756495d0 , -0.01023623d0  , -0.06548959d0  , -0.03539446d0  , -0.17150646d0  , -0.15071096d0  , -0.11326834d0   /)
+    expo_gauss_j_mu_x = (/ 9.88572565d+02,  1.21363371d+04,  3.69794870d+01,  1.67364529d+02,  3.03962934d+00,  1.27854005d+00,  9.76383343d+00 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_x = (/ -0.11489205d0 , -0.16008968d0 , -0.12892456d0 , -0.04250838d0 , -0.0718451d0  , -0.02394051d0 , -0.00913353d0 , -0.01285182d0  /)
+    expo_gauss_j_mu_x = (/ 6.97632442d+00, 2.56010878d+00, 1.22760977d+00, 7.47697124d+01, 2.16104215d+01, 2.96549728d+02, 1.40773328d+04, 1.43335159d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_x = (/ /)
+  !  expo_gauss_j_mu_x = (/ /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_x(i) = tmp * expo_gauss_j_mu_x(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
@@ -189,6 +254,36 @@ END_PROVIDER
       expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.05202849d0  , 0.01031081d0  , 0.04699157d0  , 0.01451002d0  , 0.07442576d0  , 0.02692033d0  , 0.09311842d0   /)
+    expo_gauss_j_mu_x_2 = (/ 3.04469415d+00, 1.40682034d+04, 7.45960945d+01, 1.43067466d+03, 2.16815661d+01, 2.95750306d+02, 7.23471236d+00 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_x_2 = (/ 0.00942115d0  , 0.07332421d0  , 0.0508308d0   , 0.08204949d0  , 0.0404099d0   , 0.03201288d0  , 0.01911313d0  , 0.01114732d0   /)
+    expo_gauss_j_mu_x_2 = (/ 1.56957321d+04, 1.52867810d+01, 4.36016903d+01, 5.96818956d+00, 2.85535269d+00, 1.36064008d+02, 4.71968910d+02, 1.92022350d+03 /)
+    
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_x_2 = (/  /)
+  !  expo_gauss_j_mu_x_2 = (/  /)
+  !  
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_x_2(i) = tmp * expo_gauss_j_mu_x_2(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
@@ -293,6 +388,36 @@ END_PROVIDER
       expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.11853067d0 , -0.01522824d0  , -0.07419098d0  , -0.022202d0    , -0.12242283d0  , -0.04177571d0  , -0.16983107d0  /)
+    expo_gauss_j_mu_1_erf = (/ 2.74057056d+00,  1.37626591d+04,  6.65578663d+01,  1.34693031d+03,  1.90547699d+01,  2.69445390d+02,  6.31845879d+00/)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_j_mu_1_erf = (/ -0.12263328d0 , -0.04965255d0 , -0.15463564d0 , -0.09675781d0 , -0.0807023d0  , -0.02923298d0 , -0.01381381d0 , -0.01675923d0  /)
+    expo_gauss_j_mu_1_erf = (/ 1.36101994d+01, 1.24908367d+02, 5.29061388d+00, 2.60692516d+00, 3.93396935d+01, 4.43071610d+02, 1.54902240d+04, 1.85170446d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_j_mu_1_erf = (/  /)
+  !  expo_gauss_j_mu_1_erf = (/  /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_j_mu_1_erf(i) = tmp * expo_gauss_j_mu_1_erf(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
diff --git a/src/ao_tc_eff_map/potential.irp.f b/src/ao_tc_eff_map/potential.irp.f
index 67d572e5..5b72b567 100644
--- a/src/ao_tc_eff_map/potential.irp.f
+++ b/src/ao_tc_eff_map/potential.irp.f
@@ -1,59 +1,79 @@
+! ---
+
 BEGIN_PROVIDER [integer, n_gauss_eff_pot]
- implicit none
- BEGIN_DOC
-! number of gaussians to represent the effective potential :
-!
-! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
-!
-! Here (1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
- END_DOC
- n_gauss_eff_pot = n_max_fit_slat + 1
+
+  BEGIN_DOC
+  ! number of gaussians to represent the effective potential :
+  !
+  ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
+  !
+  ! Here (1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  implicit none
+
+  n_gauss_eff_pot = ng_fit_jast + 1
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [integer, n_gauss_eff_pot_deriv]
- implicit none
- BEGIN_DOC
-! V(r12) = -(1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
- END_DOC
- n_gauss_eff_pot_deriv = n_max_fit_slat 
+
+  BEGIN_DOC
+  ! V(r12) = -(1 - erf(mu*r12))^2 is expanded in Gaussians as Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  implicit none
+  n_gauss_eff_pot_deriv = ng_fit_jast
+
 END_PROVIDER 
 
+! ---
+
  BEGIN_PROVIDER [double precision, expo_gauss_eff_pot, (n_gauss_eff_pot)]
 &BEGIN_PROVIDER [double precision, coef_gauss_eff_pot, (n_gauss_eff_pot)]
- implicit none
- BEGIN_DOC
-! Coefficients and exponents of the Fit on Gaussians of V(X) = -(1 - erf(mu*X))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*X)^2)
-!
-! V(X) = \sum_{i=1,n_gauss_eff_pot} coef_gauss_eff_pot(i) * exp(-expo_gauss_eff_pot(i) * X^2)
-!
-! Relies on the fit proposed in Eqs A11-A20 in JCP 154, 084119 (2021)
- END_DOC
- include 'constants.include.F'
 
- integer :: i
- ! fit of the -0.25 * (1 - erf(mu*x))^2 with n_max_fit_slat gaussians 
- do i = 1, n_max_fit_slat
-  expo_gauss_eff_pot(i) = expo_gauss_1_erf_x_2(i) 
-  coef_gauss_eff_pot(i) = -0.25d0 * coef_gauss_1_erf_x_2(i) ! -1/4 * (1 - erf(mu*x))^2
- enddo
- ! Analytical Gaussian part of the potential: + 1/(\sqrt(pi)mu) * exp(-(mu*x)^2) 
- expo_gauss_eff_pot(n_max_fit_slat+1) = mu_erf * mu_erf
- coef_gauss_eff_pot(n_max_fit_slat+1) =  1.d0 * mu_erf * inv_sq_pi
+  BEGIN_DOC
+  ! Coefficients and exponents of the Fit on Gaussians of V(X) = -(1 - erf(mu*X))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*X)^2)
+  !
+  ! V(X) = \sum_{i=1,n_gauss_eff_pot} coef_gauss_eff_pot(i) * exp(-expo_gauss_eff_pot(i) * X^2)
+  !
+  ! Relies on the fit proposed in Eqs A11-A20 in JCP 154, 084119 (2021)
+  END_DOC
+
+  include 'constants.include.F'
+
+  implicit none
+  integer :: i
+ 
+  ! fit of the -0.25 * (1 - erf(mu*x))^2 with n_max_fit_slat gaussians 
+  do i = 1, ng_fit_jast
+   expo_gauss_eff_pot(i) = expo_gauss_1_erf_x_2(i) 
+   coef_gauss_eff_pot(i) = -0.25d0 * coef_gauss_1_erf_x_2(i) ! -1/4 * (1 - erf(mu*x))^2
+  enddo
+
+  ! Analytical Gaussian part of the potential: + 1/(\sqrt(pi)mu) * exp(-(mu*x)^2) 
+  expo_gauss_eff_pot(ng_fit_jast+1) = mu_erf * mu_erf
+  coef_gauss_eff_pot(ng_fit_jast+1) =  1.d0 * mu_erf * inv_sq_pi
 
 END_PROVIDER 
 
+! ---
+
+double precision function eff_pot_gauss(x, mu)
+
+  BEGIN_DOC
+  ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
+  END_DOC
+
+  implicit none
+  double precision, intent(in) :: x, mu
+
+  eff_pot_gauss =  mu/dsqrt(dacos(-1.d0)) * dexp(-mu*mu*x*x) - 0.25d0 * (1.d0 - derf(mu*x))**2.d0
 
-double precision function eff_pot_gauss(x,mu)
- implicit none
- BEGIN_DOC
- ! V(mu,r12) = -0.25 * (1 - erf(mu*r12))^2 + 1/(\sqrt(pi)mu) * exp(-(mu*r12)^2)
- END_DOC
- double precision, intent(in) :: x,mu
- eff_pot_gauss =  mu/dsqrt(dacos(-1.d0)) * dexp(-mu*mu*x*x) - 0.25d0 * (1.d0 - derf(mu*x))**2.d0
 end
 
-
-
 ! -------------------------------------------------------------------------------------------------
 ! ---
 
@@ -129,16 +149,19 @@ END_PROVIDER
 ! ---
 
 double precision function fit_1_erf_x(x)
- implicit none
- double precision, intent(in) :: x
- BEGIN_DOC
-! fit_1_erf_x(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x))
- END_DOC
- integer :: i
- fit_1_erf_x = 0.d0
- do i = 1, n_max_fit_slat
-  fit_1_erf_x += dexp(-expo_gauss_1_erf_x(i) *x*x) * coef_gauss_1_erf_x(i)
- enddo
+
+  BEGIN_DOC
+  ! fit_1_erf_x(x) = \sum_i c_i exp (-alpha_i x^2) \approx (1 - erf(mu*x))
+  END_DOC
+
+  implicit none
+  integer :: i
+  double precision, intent(in) :: x
+
+  fit_1_erf_x = 0.d0
+  do i = 1, n_max_fit_slat
+    fit_1_erf_x += dexp(-expo_gauss_1_erf_x(i) *x*x) * coef_gauss_1_erf_x(i)
+  enddo
 
 end
 
@@ -165,7 +188,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 6.23519457d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -175,7 +198,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 55.39184787d0, 3.92151407d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -185,7 +208,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 19.90272209d0, 3.2671671d0 , 336.47320445d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -195,7 +218,7 @@ end
     expo_gauss_1_erf_x_2 = (/ 6467.28126d0, 46.9071990d0, 9.09617721d0, 2.76883328d0, 360.367093d0 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
@@ -205,10 +228,40 @@ end
     expo_gauss_1_erf_x_2 = (/ 2.54293498d+01, 1.40317872d+02, 7.14630801d+00, 2.65517675d+00, 1.45142619d+03, 1.00000000d+04 /)
 
     tmp = mu_erf * mu_erf
-    do i = 1, n_max_fit_slat
+    do i = 1, ng_fit_jast
       expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
     enddo
 
+  elseif(ng_fit_jast .eq. 7) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.0213619d0   , 0.03221511d0  , 0.29966689d0  , 0.19178934d0  , 0.06154732d0  , 0.28214555d0  , 0.11125985d0   /)
+    expo_gauss_1_erf_x_2 = (/ 1.34727067d+04, 1.27166613d+03, 5.52584567d+00, 1.67753218d+01, 2.46145691d+02, 2.47971820d+00, 5.95141293d+01 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  elseif(ng_fit_jast .eq. 8) then
+
+    coef_gauss_1_erf_x_2 = (/ 0.28189124d0  , 0.19518669d0  , 0.12161735d0  , 0.24257438d0  , 0.07309656d0  , 0.042435d0    , 0.01926109d0  , 0.02393415d0   /)
+    expo_gauss_1_erf_x_2 = (/ 4.69795903d+00, 1.21379451d+01, 3.55527053d+01, 2.39227172d+00, 1.14827721d+02, 4.16320213d+02, 1.52813587d+04, 1.78516557d+03 /)
+
+    tmp = mu_erf * mu_erf
+    do i = 1, ng_fit_jast
+      expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+    enddo
+
+  !elseif(ng_fit_jast .eq. 9) then
+
+  !  coef_gauss_1_erf_x_2 = (/  /)
+  !  expo_gauss_1_erf_x_2 = (/  /)
+
+  !  tmp = mu_erf * mu_erf
+  !  do i = 1, ng_fit_jast
+  !    expo_gauss_1_erf_x_2(i) = tmp * expo_gauss_1_erf_x_2(i)
+  !  enddo
+
   elseif(ng_fit_jast .eq. 20) then
 
     ASSERT(n_max_fit_slat == 20)
diff --git a/src/bi_ort_ints/semi_num_ints_mo.irp.f b/src/bi_ort_ints/semi_num_ints_mo.irp.f
index 33f512cf..4694a998 100644
--- a/src/bi_ort_ints/semi_num_ints_mo.irp.f
+++ b/src/bi_ort_ints/semi_num_ints_mo.irp.f
@@ -107,50 +107,69 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_transp, (ao_num, ao_num, 3,
   integer          :: i, j, ipoint
   double precision :: wall0, wall1
 
+  print *, ' providing int2_grad1_u12_ao_transp ...'
   call wall_time(wall0)
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = 1, ao_num
-        int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(1,j,i,ipoint)
-        int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(2,j,i,ipoint)
-        int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(3,j,i,ipoint)
-      enddo
-    enddo
-  enddo
+
+  if(test_cycle_tc)then
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,1)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,2)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao_test(j,i,ipoint,3)
+       enddo
+     enddo
+   enddo
+  else
+   do ipoint = 1, n_points_final_grid
+     do i = 1, ao_num
+       do j = 1, ao_num
+         int2_grad1_u12_ao_transp(j,i,1,ipoint) = int2_grad1_u12_ao(j,i,ipoint,1)
+         int2_grad1_u12_ao_transp(j,i,2,ipoint) = int2_grad1_u12_ao(j,i,ipoint,2)
+         int2_grad1_u12_ao_transp(j,i,3,ipoint) = int2_grad1_u12_ao(j,i,ipoint,3)
+       enddo
+     enddo
+   enddo
+  endif
   call wall_time(wall1)
   print *, ' wall time for int2_grad1_u12_ao_transp ', wall1 - wall0
 
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_transp, (mo_num, mo_num, 3, n_points_final_grid)]
 
   implicit none
   integer :: ipoint
+  double precision :: wall0, wall1
 
- print*,'providing int2_grad1_u12_bimo_transp'
- double precision :: wall0, wall1
- call wall_time(wall0)
- !$OMP PARALLEL         &
- !$OMP DEFAULT (NONE)   &
- !$OMP PRIVATE (ipoint) & 
- !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
- !$OMP DO SCHEDULE (dynamic)
-  do ipoint = 1, n_points_final_grid
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
-                          , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
- call wall_time(wall1)
- print*,'Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
+  !print *, ' providing int2_grad1_u12_bimo_transp'
+
+  call wall_time(wall0)
+  !$OMP PARALLEL         &
+  !$OMP DEFAULT (NONE)   &
+  !$OMP PRIVATE (ipoint) & 
+  !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao_transp,int2_grad1_u12_bimo_transp)
+  !$OMP DO SCHEDULE (dynamic)
+   do ipoint = 1, n_points_final_grid
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,1,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,1,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,2,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,2,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+     call ao_to_mo_bi_ortho( int2_grad1_u12_ao_transp  (1,1,3,ipoint), size(int2_grad1_u12_ao_transp  , 1) &
+                           , int2_grad1_u12_bimo_transp(1,1,3,ipoint), size(int2_grad1_u12_bimo_transp, 1) )
+   enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  !print *, ' Wall time for providing int2_grad1_u12_bimo_transp',wall1 - wall0
 
 END_PROVIDER 
 
 ! ---
+
 BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3, mo_num, mo_num )]
  implicit none
  integer          :: i, j, ipoint
@@ -165,35 +184,22 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo_t, (n_points_final_grid,3
  enddo
 END_PROVIDER 
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_bimo, (3, mo_num, mo_num, n_points_final_grid)]
+! ---
 
-  BEGIN_DOC
-  !
-  ! int2_grad1_u12_bimo(:,k,i,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \chi_k(r2) \phi_i(r2) 
-  !
-  END_DOC
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_t, (n_points_final_grid, 3, ao_num, ao_num)]
 
   implicit none
-  integer :: ipoint
-  print*,'Wrong !!'
-  stop
- !$OMP PARALLEL         &
- !$OMP DEFAULT (NONE)   &
- !$OMP PRIVATE (ipoint) & 
- !$OMP SHARED (n_points_final_grid,int2_grad1_u12_ao,int2_grad1_u12_bimo)
- !$OMP DO SCHEDULE (dynamic)
+  integer :: i, j, ipoint
+
   do ipoint = 1, n_points_final_grid
-
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao  (1,1,1,ipoint), size(int2_grad1_u12_ao  , 2) &
-                          , int2_grad1_u12_bimo(1,1,1,ipoint), size(int2_grad1_u12_bimo, 2) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao  (2,1,1,ipoint), size(int2_grad1_u12_ao  , 2) &
-                          , int2_grad1_u12_bimo(2,1,1,ipoint), size(int2_grad1_u12_bimo, 2) )
-    call ao_to_mo_bi_ortho( int2_grad1_u12_ao  (3,1,1,ipoint), size(int2_grad1_u12_ao  , 2) &
-                          , int2_grad1_u12_bimo(3,1,1,ipoint), size(int2_grad1_u12_bimo, 2) )
-
+    do i = 1, ao_num
+      do j = 1, ao_num
+        int2_grad1_u12_ao_t(ipoint,1,j,i) = int2_grad1_u12_ao(j,i,ipoint,1)
+        int2_grad1_u12_ao_t(ipoint,2,j,i) = int2_grad1_u12_ao(j,i,ipoint,2)
+        int2_grad1_u12_ao_t(ipoint,3,j,i) = int2_grad1_u12_ao(j,i,ipoint,3)
+      enddo                                  
+    enddo
   enddo
- !$OMP END DO
- !$OMP END PARALLEL
 
 END_PROVIDER 
 
diff --git a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
index c1c27f06..48fa84f7 100644
--- a/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
+++ b/src/bi_ort_ints/three_body_ints_bi_ort.irp.f
@@ -15,7 +15,7 @@ BEGIN_PROVIDER [ double precision, three_body_ints_bi_ort, (mo_num, mo_num, mo_n
  character*(128)  :: name_file 
 
   three_body_ints_bi_ort = 0.d0
-  print*,'Providing the three_body_ints_bi_ort ...'
+  print *, ' Providing the three_body_ints_bi_ort ...'
   call wall_time(wall0)
   name_file = 'six_index_tensor'
 
@@ -71,7 +71,7 @@ subroutine give_integrals_3_body_bi_ort(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
   !
   END_DOC
 
@@ -104,12 +104,11 @@ end subroutine give_integrals_3_body_bi_ort
 
 ! ---
 
-
 subroutine give_integrals_3_body_bi_ort_old(n, l, k, m, j, i, integral)
 
   BEGIN_DOC
   !
-  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ORBITALS 
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL MOLECULAR ORBITALS 
   !
   END_DOC
 
@@ -170,3 +169,39 @@ end subroutine give_integrals_3_body_bi_ort_old
 
 ! ---
 
+subroutine give_integrals_3_body_bi_ort_ao(n, l, k, m, j, i, integral)
+
+  BEGIN_DOC
+  !
+  ! < n l k | -L | m j i > with a BI-ORTHONORMAL ATOMIC ORBITALS 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n, l, k, m, j, i
+  double precision, intent(out) :: integral
+  integer                       :: ipoint
+  double precision              :: weight
+
+  integral = 0.d0
+  do ipoint = 1, n_points_final_grid
+    weight = final_weight_at_r_vector(ipoint)                                                                          
+
+    integral += weight * aos_in_r_array_transp(ipoint,k) * aos_in_r_array_transp(ipoint,i) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,l,j)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,l,j) )
+    integral += weight * aos_in_r_array_transp(ipoint,l) * aos_in_r_array_transp(ipoint,j) & 
+              * ( int2_grad1_u12_ao_t(ipoint,1,n,m) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,n,m) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,n,m) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+    integral += weight * aos_in_r_array_transp(ipoint,n) * aos_in_r_array_transp(ipoint,m) &
+              * ( int2_grad1_u12_ao_t(ipoint,1,l,j) * int2_grad1_u12_ao_t(ipoint,1,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,2,l,j) * int2_grad1_u12_ao_t(ipoint,2,k,i)    &
+                + int2_grad1_u12_ao_t(ipoint,3,l,j) * int2_grad1_u12_ao_t(ipoint,3,k,i) )
+
+  enddo
+
+end subroutine give_integrals_3_body_bi_ort_ao
+
+! ---
diff --git a/src/bi_ortho_mos/bi_density.irp.f b/src/bi_ortho_mos/bi_density.irp.f
index 947be870..90fe9634 100644
--- a/src/bi_ortho_mos/bi_density.irp.f
+++ b/src/bi_ortho_mos/bi_density.irp.f
@@ -2,47 +2,66 @@
 ! ---
 
 BEGIN_PROVIDER [double precision, TCSCF_bi_ort_dm_ao_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao_alpha(i,j) = <Chi_0| a^dagger_i,alpha a_j,alpha |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the alpha density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
   implicit none
- BEGIN_DOC
-! TCSCF_bi_ort_dm_ao_alpha(i,j) = <Chi_0| a^dagger_i,alpha a_j,alpha |Phi_0> where i,j are AO basis. 
-!
-! This is the equivalent of the alpha density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
- END_DOC
+
+  PROVIDE mo_l_coef mo_r_coef
+
   call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
             , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
             , 0.d0, TCSCF_bi_ort_dm_ao_alpha, size(TCSCF_bi_ort_dm_ao_alpha, 1) )
+
 END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao_beta(i,j) = <Chi_0| a^dagger_i,beta a_j,beta |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
   implicit none
- BEGIN_DOC
-! TCSCF_bi_ort_dm_ao_beta(i,j) = <Chi_0| a^dagger_i,beta a_j,beta |Phi_0> where i,j are AO basis. 
-!
-! This is the equivalent of the beta density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
- END_DOC
-  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0               &
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
             , mo_l_coef, size(mo_l_coef, 1), mo_r_coef, size(mo_r_coef, 1) &
             , 0.d0, TCSCF_bi_ort_dm_ao_beta, size(TCSCF_bi_ort_dm_ao_beta, 1) )
+
 END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, TCSCF_bi_ort_dm_ao, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  ! TCSCF_bi_ort_dm_ao(i,j) = <Chi_0| a^dagger_i,beta+alpha a_j,beta+alpha |Phi_0> where i,j are AO basis. 
+  !
+  ! This is the equivalent of the total electronic density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
+  END_DOC
+
   implicit none
- BEGIN_DOC
-! TCSCF_bi_ort_dm_ao(i,j) = <Chi_0| a^dagger_i,beta+alpha a_j,beta+alpha |Phi_0> where i,j are AO basis. 
-!
-! This is the equivalent of the total electronic density of the HF Slater determinant, but with a couple of bi-orthonormal Slater determinant |Chi_0> and |Phi_0>
- END_DOC
-  ASSERT ( size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_alpha, 1) )
-  if( elec_alpha_num==elec_beta_num ) then
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_alpha, 1))
+
+  if(elec_alpha_num==elec_beta_num) then
     TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_alpha
   else
-    ASSERT ( size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_beta, 1))
+    ASSERT(size(TCSCF_bi_ort_dm_ao, 1) == size(TCSCF_bi_ort_dm_ao_beta, 1))
     TCSCF_bi_ort_dm_ao = TCSCF_bi_ort_dm_ao_alpha + TCSCF_bi_ort_dm_ao_beta
   endif
+
 END_PROVIDER
 
 ! ---
diff --git a/src/bi_ortho_mos/mos_rl.irp.f b/src/bi_ortho_mos/mos_rl.irp.f
index 034a436e..d51999fc 100644
--- a/src/bi_ortho_mos/mos_rl.irp.f
+++ b/src/bi_ortho_mos/mos_rl.irp.f
@@ -37,6 +37,52 @@ end subroutine ao_to_mo_bi_ortho
 
 ! ---
 
+subroutine mo_to_ao_bi_ortho(A_mo, LDA_mo, A_ao, LDA_ao)
+
+  BEGIN_DOC
+  !
+  ! mo_l_coef.T x     A_ao   x mo_r_coef = A_mo
+  ! mo_l_coef.T x ao_overlap x mo_r_coef =  I
+  !
+  ! ==> A_ao = (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: LDA_ao, LDA_mo
+  double precision, intent(in)  :: A_mo(LDA_mo,mo_num)
+  double precision, intent(out) :: A_ao(LDA_ao,ao_num)
+  double precision, allocatable :: tmp_1(:,:), tmp_2(:,:)
+
+  ! ao_overlap x mo_r_coef
+  allocate( tmp_1(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_r_coef, size(mo_r_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo
+  allocate( tmp_2(ao_num,mo_num) )
+  call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0 &
+            , tmp_1, size(tmp_1, 1), A_mo, LDA_mo    &
+            , 0.d0, tmp_2, size(tmp_2, 1) )
+  
+  ! ao_overlap x mo_l_coef
+  tmp_1 = 0.d0
+  call dgemm( 'N', 'N', ao_num, mo_num, ao_num, 1.d0                         &
+            , ao_overlap, size(ao_overlap, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, tmp_1, size(tmp_1, 1) )
+
+  ! (ao_overlap x mo_r_coef) x A_mo x (ao_overlap x mo_l_coef).T
+  call dgemm( 'N', 'T', ao_num, ao_num, mo_num, 1.d0       &
+            , tmp_2, size(tmp_2, 1), tmp_1, size(tmp_1, 1) &
+            , 0.d0, A_ao, LDA_ao )
+  
+  deallocate(tmp_1, tmp_2)
+
+end subroutine mo_to_ao_bi_ortho
+
+! ---
+
 BEGIN_PROVIDER [ double precision, mo_r_coef, (ao_num, mo_num) ]
 
   BEGIN_DOC
@@ -175,3 +221,4 @@ END_PROVIDER
 
 ! ---
 
+
diff --git a/src/dft_utils_in_r/ao_in_r.irp.f b/src/dft_utils_in_r/ao_in_r.irp.f
index 6fa6a4c7..72f820ec 100644
--- a/src/dft_utils_in_r/ao_in_r.irp.f
+++ b/src/dft_utils_in_r/ao_in_r.irp.f
@@ -40,6 +40,47 @@
  END_PROVIDER
 
 
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra, (ao_num,n_points_extra_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra(i,j)        = value of the ith ao on the jth grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ !$OMP PARALLEL DO &
+ !$OMP DEFAULT (NONE)  &
+ !$OMP PRIVATE (i,r,aos_array,j) & 
+ !$OMP SHARED(aos_in_r_array_extra,n_points_extra_final_grid,ao_num,final_grid_points_extra)
+ do i = 1, n_points_extra_final_grid
+  r(1) = final_grid_points_extra(1,i)
+  r(2) = final_grid_points_extra(2,i)
+  r(3) = final_grid_points_extra(3,i)
+  call give_all_aos_at_r(r,aos_array)
+  do j = 1, ao_num
+   aos_in_r_array_extra(j,i) = aos_array(j)
+  enddo
+ enddo
+ !$OMP END PARALLEL DO
+
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER[double precision, aos_in_r_array_extra_transp, (n_points_extra_final_grid,ao_num)]
+ implicit none
+ BEGIN_DOC
+ ! aos_in_r_array_extra_transp(i,j) = value of the jth ao on the ith grid point
+ END_DOC
+ integer :: i,j
+ double precision :: aos_array(ao_num), r(3)
+ do i = 1, n_points_extra_final_grid
+  do j = 1, ao_num
+   aos_in_r_array_extra_transp(i,j) = aos_in_r_array_extra(j,i) 
+  enddo
+ enddo
+
+ END_PROVIDER
+
+
 
  BEGIN_PROVIDER[double precision, aos_grad_in_r_array, (ao_num,n_points_final_grid,3)]
  implicit none
diff --git a/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
new file mode 100644
index 00000000..39ea0cdf
--- /dev/null
+++ b/src/dft_utils_in_r/ao_prod_mlti_pl.irp.f
@@ -0,0 +1,155 @@
+
+BEGIN_PROVIDER [ double precision, ao_abs_int_grid, (ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_abs_int_grid(i) = \int dr |phi_i(r) |
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_abs_int_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+    contrib = dabs(aos_in_r_array(i,ipoint)) * weight
+    ao_abs_int_grid(i) += contrib 
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_overlap_abs_grid, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_overlap_abs_grid(j,i) = \int dr |phi_i(r) phi_j(r)| 
+ END_DOC
+ integer :: i,j,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_overlap_abs_grid = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    ao_overlap_abs_grid(j,i) += contrib 
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_center, (3, ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_center(1:3,j,i) = \int dr |phi_i(r) phi_j(r)| x/y/z / \int |phi_i(r) phi_j(r)|
+!
+! if \int |phi_i(r) phi_j(r)| < 1.d-10 then ao_prod_center = 10000.
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3)
+ ao_prod_center = 0.D0
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    do m = 1, 3
+     ao_prod_center(m,j,i) += contrib * r(m)
+    enddo
+   enddo
+  enddo
+ enddo
+ do i = 1, ao_num
+  do j = 1, ao_num
+   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-10)then
+    do m = 1, 3
+     ao_prod_center(m,j,i) *= 1.d0/ao_overlap_abs_grid(j,i)
+    enddo
+   else
+    do m = 1, 3
+     ao_prod_center(m,j,i) = 10000.d0
+    enddo
+   endif
+  enddo
+ enddo
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_abs_r, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! ao_prod_abs_r(i,j) = \int |phi_i(r) phi_j(r)| dsqrt((x - <|i|x|j|>)^2 + (y - <|i|y|j|>)^2 +(z - <|i|z|j|>)^2) / \int |phi_i(r) phi_j(r)|
+!
+ END_DOC
+ ao_prod_abs_r = 0.d0
+ integer :: i,j,m,ipoint
+ double precision :: contrib, weight,r(3),contrib_x2
+ do ipoint = 1,n_points_final_grid 
+  r(:) = final_grid_points(:,ipoint)
+  weight = final_weight_at_r_vector(ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    contrib = dabs(aos_in_r_array(j,ipoint) * aos_in_r_array(i,ipoint)) * weight
+    contrib_x2 = 0.d0
+    do m = 1, 3
+     contrib_x2 += (r(m) - ao_prod_center(m,j,i)) * (r(m) - ao_prod_center(m,j,i)) 
+    enddo
+    contrib_x2 = dsqrt(contrib_x2)
+    ao_prod_abs_r(j,i) += contrib * contrib_x2
+   enddo
+  enddo
+ enddo
+
+
+END_PROVIDER 
+
+ BEGIN_PROVIDER [double precision, ao_prod_sigma, (ao_num, ao_num)]
+ implicit none
+ BEGIN_DOC
+! Gaussian exponent reproducing the product |chi_i(r) chi_j(r)| 
+!
+! Therefore |chi_i(r) chi_j(r)|  \approx e^{-ao_prod_sigma(j,i) (r - ao_prod_center(1:3,j,i))**2}
+ END_DOC
+ integer :: i,j
+ double precision :: pi,alpha
+ pi = dacos(-1.d0)
+ do i = 1, ao_num
+  do j = 1, ao_num
+!   if(dabs(ao_overlap_abs_grid(j,i)).gt.1.d-5)then
+     alpha = 1.d0/pi * (2.d0*ao_overlap_abs_grid(j,i)/ao_prod_abs_r(j,i))**2
+     ao_prod_sigma(j,i) = alpha
+!   endif
+  enddo
+ enddo
+ END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, ao_prod_dist_grid, (ao_num, ao_num, n_points_final_grid)]
+ implicit none
+ BEGIN_DOC
+ ! ao_prod_dist_grid(j,i,ipoint) = distance between the center of |phi_i(r) phi_j(r)| and the grid point r(ipoint)
+ END_DOC
+ integer :: i,j,m,ipoint
+ double precision :: distance,r(3)
+ do ipoint = 1, n_points_final_grid
+  r(:) = final_grid_points(:,ipoint)
+  do i = 1, ao_num
+   do j = 1, ao_num
+    distance = 0.d0
+    do m = 1, 3
+     distance += (ao_prod_center(m,j,i) - r(m))*(ao_prod_center(m,j,i) - r(m))
+    enddo
+    distance = dsqrt(distance)
+    ao_prod_dist_grid(j,i,ipoint)  = distance
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER 
+
+
+!BEGIN_PROVIDER [ double precision, ao_abs_prod_j1b, (ao_num, ao_num)]
+! implicit none
+!
+!END_PROVIDER 
diff --git a/src/hartree_fock/fock_matrix_hf.irp.f b/src/hartree_fock/fock_matrix_hf.irp.f
index d7d8fa7d..cb698fbb 100644
--- a/src/hartree_fock/fock_matrix_hf.irp.f
+++ b/src/hartree_fock/fock_matrix_hf.irp.f
@@ -1,12 +1,27 @@
+! ---
 
  BEGIN_PROVIDER [ double precision, ao_two_e_integral_alpha, (ao_num, ao_num) ]
-&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta ,  (ao_num, ao_num) ]
- use map_module
- implicit none
+&BEGIN_PROVIDER [ double precision, ao_two_e_integral_beta , (ao_num, ao_num) ]
+
  BEGIN_DOC
- ! Alpha and Beta Fock matrices in AO basis set
+ !
+ ! 2-e part of alpha and beta Fock matrices (F^{a} & F^{b}) in AO basis set
+ ! 
+ ! F^{a} = h + G^{a}
+ ! F^{b} = h + G^{b}
+ ! 
+ ! where : 
+ !         F^{a} = J^{a} + J^{b} - K^{a} ==> G_{ij}^{a} = \sum_{k,l} P_{kl} (kl|ij) - P_{kl}^{a} (ki|lj)
+ !         F^{b} = J^{a} + J^{b} - K^{b} ==> G_{ij}^{b} = \sum_{k,l} P_{kl} (kl|ij) - P_{kl}^{b} (ki|lj)
+ ! 
+ ! and P_{kl} = P_{kl}^{a} + P_{kl}^{b} 
+ !
  END_DOC
 
+ use map_module
+
+ implicit none
+
  integer                        :: i,j,k,l,k1,r,s
  integer                        :: i0,j0,k0,l0
  integer*8                      :: p,q
@@ -153,6 +168,8 @@
 
 END_PROVIDER
 
+! ---
+
  BEGIN_PROVIDER [ double precision, Fock_matrix_ao_alpha, (ao_num, ao_num) ]
 &BEGIN_PROVIDER [ double precision, Fock_matrix_ao_beta,  (ao_num, ao_num) ]
  implicit none
diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f
index 3226073d..8cbf9dd0 100644
--- a/src/hartree_fock/scf.irp.f
+++ b/src/hartree_fock/scf.irp.f
@@ -68,20 +68,29 @@ subroutine create_guess
   endif
 end
 
-subroutine run
+! ---
+
+subroutine run()
 
   BEGIN_DOC
-!   Run SCF calculation
+  ! Run SCF calculation
   END_DOC
 
   use bitmasks
   implicit none
 
-  integer                        :: i_it, i, j, k
-
   mo_label = 'Orthonormalized'
 
-  call Roothaan_Hall_SCF
+  PROVIDE scf_algorithm
+
+  if(scf_algorithm .eq. "DIIS") then
+    call Roothaan_Hall_SCF()
+  elseif(scf_algorithm .eq. "Simple") then
+    call Roothaan_Hall_SCF_Simple()
+  else
+    print *, scf_algorithm, ' not implemented yet'
+  endif
+
   call ezfio_set_hartree_fock_energy(SCF_energy)
 
 end
diff --git a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
index bb585f63..5e7ef7e9 100644
--- a/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
+++ b/src/non_h_ints_mu/debug_integ_jmu_modif.irp.f
@@ -17,7 +17,7 @@ program debug_integ_jmu_modif
 
   PROVIDE mu_erf j1b_pen
 
-  call test_v_ij_u_cst_mu_j1b()
+!  call test_v_ij_u_cst_mu_j1b()
 !  call test_v_ij_erf_rk_cst_mu_j1b()
 !  call test_x_v_ij_erf_rk_cst_mu_j1b()
 !  call test_int2_u2_j1b2()
@@ -31,6 +31,9 @@ program debug_integ_jmu_modif
 !  call test_u12_grad1_u12_j1b_grad1_j1b()
 !  !call test_gradu_squared_u_ij_mu()
 
+  !call test_vect_overlap_gauss_r12_ao()
+  call test_vect_overlap_gauss_r12_ao_with1s()
+
 end
 
 ! ---
@@ -303,7 +306,7 @@ subroutine test_int2_grad1_u12_ao()
 
         call num_int2_grad1_u12_ao(i, j, ipoint, integ)
 
-        i_exc  = int2_grad1_u12_ao(1,i,j,ipoint) 
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,1) 
         i_num  = integ(1)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -315,7 +318,7 @@ subroutine test_int2_grad1_u12_ao()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = int2_grad1_u12_ao(2,i,j,ipoint) 
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,2) 
         i_num  = integ(2)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -327,7 +330,7 @@ subroutine test_int2_grad1_u12_ao()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = int2_grad1_u12_ao(3,i,j,ipoint) 
+        i_exc  = int2_grad1_u12_ao(i,j,ipoint,3) 
         i_num  = integ(3)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -379,7 +382,7 @@ subroutine test_int2_u_grad1u_total_j1b2()
 
         call num_int2_u_grad1u_total_j1b2(i, j, ipoint, integ)
 
-        i_exc  = x * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(1,i,j,ipoint) 
+        i_exc  = x * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,1)
         i_num  = integ(1)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -391,7 +394,7 @@ subroutine test_int2_u_grad1u_total_j1b2()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = y * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(2,i,j,ipoint) 
+        i_exc  = y * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,2) 
         i_num  = integ(2)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -403,7 +406,7 @@ subroutine test_int2_u_grad1u_total_j1b2()
         acc_tot += acc_ij
         normalz += dabs(i_num)
 
-        i_exc  = z * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(3,i,j,ipoint) 
+        i_exc  = z * int2_u_grad1u_j1b2(i,j,ipoint) - int2_u_grad1u_x_j1b2(i,j,ipoint,3) 
         i_num  = integ(3)
         acc_ij = dabs(i_exc - i_num)
         if(acc_ij .gt. eps_ij) then
@@ -595,7 +598,183 @@ subroutine test_u12_grad1_u12_j1b_grad1_j1b()
   print*, ' normalz = ', normalz
 
   return
-end subroutine test_u12_grad1_u12_j1b_grad1_j1b,
+end subroutine test_u12_grad1_u12_j1b_grad1_j1b
 
 ! ---
 
+subroutine test_vect_overlap_gauss_r12_ao()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao
+
+  print *, ' test_vect_overlap_gauss_r12_ao ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit = expo_gauss_j_mu_x_2(1)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_v(final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao(r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
+! ---
+
+subroutine test_vect_overlap_gauss_r12_ao_with1s()
+
+  implicit none
+
+  integer                       :: i, j, ipoint
+  double precision              :: acc_ij, acc_tot, eps_ij, i_exc, i_num, normalz
+  double precision              :: expo_fit, r(3), beta, B_center(3)
+  double precision, allocatable :: I_vec(:,:,:), I_ref(:,:,:), int_fit_v(:)
+
+  double precision, external    :: overlap_gauss_r12_ao_with1s
+
+  print *, ' test_vect_overlap_gauss_r12_ao_with1s ...'
+
+  provide mu_erf final_grid_points_transp j1b_pen
+
+  expo_fit    = expo_gauss_j_mu_x_2(1)
+  beta        = List_all_comb_b3_expo  (2)
+  B_center(1) = List_all_comb_b3_cent(1,2)
+  B_center(2) = List_all_comb_b3_cent(2,2)
+  B_center(3) = List_all_comb_b3_cent(3,2)
+
+  ! ---
+
+  allocate(int_fit_v(n_points_final_grid))
+  allocate(I_vec(ao_num,ao_num,n_points_final_grid))
+
+  I_vec = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, n_points_final_grid, expo_fit, i, j, int_fit_v, n_points_final_grid, n_points_final_grid)
+
+      do ipoint = 1, n_points_final_grid
+        I_vec(j,i,ipoint) = int_fit_v(ipoint)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  allocate(I_ref(ao_num,ao_num,n_points_final_grid))
+
+  do ipoint = 1, n_points_final_grid
+    r(1) = final_grid_points(1,ipoint)
+    r(2) = final_grid_points(2,ipoint)
+    r(3) = final_grid_points(3,ipoint)
+
+    do i = 1, ao_num
+      do j = 1, ao_num
+
+        I_ref(j,i,ipoint) = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  eps_ij  = 1d-3
+  acc_tot = 0.d0
+  normalz = 0.d0
+
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        i_exc  = I_ref(i,j,ipoint) 
+        i_num  = I_vec(i,j,ipoint)
+        acc_ij = dabs(i_exc - i_num)
+        !acc_ij = dabs(i_exc - i_num) / dabs(i_exc)
+        if(acc_ij .gt. eps_ij) then
+          print *, ' problem in overlap_gauss_r12_ao_v on', i, j, ipoint
+          print *, ' analyt integ = ', i_exc
+          print *, ' numeri integ = ', i_num
+          print *, ' diff         = ', acc_ij
+          stop
+        endif
+
+        acc_tot += acc_ij
+        normalz += dabs(i_num)
+      enddo
+    enddo
+  enddo
+
+  print*, ' acc_tot = ', acc_tot
+  print*, ' normalz = ', normalz
+
+  return
+end subroutine test_vect_overlap_gauss_r12_ao
+
diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f
index 4e70bc5c..81a8fe71 100644
--- a/src/non_h_ints_mu/grad_squared.irp.f
+++ b/src/non_h_ints_mu/grad_squared.irp.f
@@ -70,9 +70,9 @@ BEGIN_PROVIDER [ double precision, gradu_squared_u_ij_mu, (ao_num, ao_num, n_poi
 
           gradu_squared_u_ij_mu(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2(i,j,ipoint)            &
                                             + tmp2 * int2_u2_j1b2             (i,j,ipoint)            &
-                                            + tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(1,i,j,ipoint) &
-                                            + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(2,i,j,ipoint) &
-                                            + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+                                            + tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) &
+                                            + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) &
+                                            + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3)
         enddo
       enddo
     enddo
@@ -104,11 +104,11 @@ END_PROVIDER
 
 ! ---
 
-!BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+!BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
 !
 !  BEGIN_DOC
 !  !
-!  ! tc_grad_square_ao(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+!  ! tc_grad_square_ao_loop(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
 !  !
 !  END_DOC
 !
@@ -142,8 +142,8 @@ END_PROVIDER
 !    do l = 1, ao_num
 !      do i = 1, ao_num
 !        do k = 1, ao_num
-!          tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
-!          !write(11,*) tc_grad_square_ao(k,i,l,j)
+!          tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+!          !write(11,*) tc_grad_square_ao_loop(k,i,l,j)
 !        enddo
 !      enddo
 !    enddo
@@ -155,19 +155,23 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
   !
-  ! tc_grad_square_ao(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  ! tc_grad_square_ao_loop(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
   !
   END_DOC
 
   implicit none
   integer                       :: ipoint, i, j, k, l
   double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: time0, time1
   double precision, allocatable :: ac_mat(:,:,:,:), bc_mat(:,:,:,:)
 
+  print*, ' providing tc_grad_square_ao_loop ...'
+  call wall_time(time0)
+
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
   allocate(bc_mat(ao_num,ao_num,ao_num,ao_num))
@@ -177,10 +181,12 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
     weight1 = final_weight_at_r_vector(ipoint)
 
     do i = 1, ao_num
-      ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+      !ao_i_r = weight1 * aos_in_r_array_transp(ipoint,i)
+      ao_i_r = weight1 * aos_in_r_array(i,ipoint)
 
       do k = 1, ao_num
-        ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+        !ao_ik_r = ao_i_r * aos_in_r_array_transp(ipoint,k)
+        ao_ik_r = ao_i_r * aos_in_r_array(k,ipoint)
 
         do j = 1, ao_num
           do l = 1, ao_num
@@ -196,7 +202,7 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
     do l = 1, ao_num
       do i = 1, ao_num
         do k = 1, ao_num
-          tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j)
+          tc_grad_square_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) + bc_mat(k,i,l,j)
         enddo
       enddo
     enddo
@@ -205,6 +211,9 @@ BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao
   deallocate(ac_mat)
   deallocate(bc_mat)
 
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_loop = ', time1 - time0
+
 END_PROVIDER 
 
 ! ---
@@ -328,9 +337,9 @@ BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b, (ao_num, ao_num,
 
         tmp9 = int2_u_grad1u_j1b2(i,j,ipoint)
 
-        u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(1,i,j,ipoint) &
-                                                + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(2,i,j,ipoint) &
-                                                + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(3,i,j,ipoint)
+        u12_grad1_u12_j1b_grad1_j1b(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2(i,j,ipoint,1) &
+                                                + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2(i,j,ipoint,2) &
+                                                + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2(i,j,ipoint,3)
       enddo
     enddo
   enddo
@@ -342,3 +351,86 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao(k,i,l,j) = 1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_2 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao ...'
+  call wall_time(time0)
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (i, k, ipoint) & 
+ !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  tmp = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (j, l, ipoint) & 
+ !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq, u12_grad1_u12_j1b_grad1_j1b, grad12_j12)
+ !$OMP DO SCHEDULE (static)
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do l = 1, ao_num
+        tmp(l,j,ipoint) = u12sq_j1bsq(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b(l,j,ipoint) + 0.5d0 * grad12_j12(l,j,ipoint)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+
+  ac_mat = 0.d0
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+            , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+            , 1.d0, ac_mat, ao_num*ao_num)
+  deallocate(tmp, b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_square_ao, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i) 
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
diff --git a/src/non_h_ints_mu/grad_squared_manu.irp.f b/src/non_h_ints_mu/grad_squared_manu.irp.f
new file mode 100644
index 00000000..180c9588
--- /dev/null
+++ b/src/non_h_ints_mu/grad_squared_manu.irp.f
@@ -0,0 +1,221 @@
+
+BEGIN_PROVIDER [double precision, tc_grad_square_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_square_ao_test(k,i,l,j) = -1/2 <kl | |\grad_1 u(r1,r2)|^2 + |\grad_1 u(r1,r2)|^2 | ij>
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l
+  double precision              :: weight1, ao_ik_r, ao_i_r,contrib,contrib2
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:), tmp(:,:,:)
+
+  print*, ' providing tc_grad_square_ao_test ...'
+  call wall_time(time0)
+
+  provide u12sq_j1bsq_test u12_grad1_u12_j1b_grad1_j1b_test grad12_j12_test
+
+  allocate(ac_mat(ao_num,ao_num,ao_num,ao_num), b_mat(n_points_final_grid,ao_num,ao_num), tmp(ao_num,ao_num,n_points_final_grid))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (i, k, ipoint) & 
+ !$OMP SHARED (aos_in_r_array_transp, b_mat, ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+        b_mat(ipoint,k,i) = final_weight_at_r_vector(ipoint) * aos_in_r_array_transp(ipoint,i) * aos_in_r_array_transp(ipoint,k)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  tmp = 0.d0
+ !$OMP PARALLEL               &
+ !$OMP DEFAULT (NONE)         &
+ !$OMP PRIVATE (j, l, ipoint) & 
+ !$OMP SHARED (tmp, ao_num, n_points_final_grid, u12sq_j1bsq_test, u12_grad1_u12_j1b_grad1_j1b_test, grad12_j12_test)
+ !$OMP DO SCHEDULE (static)
+  do ipoint = 1, n_points_final_grid
+    do j = 1, ao_num
+      do l = 1, ao_num
+        tmp(l,j,ipoint) = u12sq_j1bsq_test(l,j,ipoint) + u12_grad1_u12_j1b_grad1_j1b_test(l,j,ipoint) + 0.5d0 * grad12_j12_test(l,j,ipoint)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0 &
+            , tmp(1,1,1), ao_num*ao_num, b_mat(1,1,1), n_points_final_grid      &
+            , 1.d0, ac_mat, ao_num*ao_num)
+  deallocate(tmp, b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_square_ao_test, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_square_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_square_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12sq_j1bsq_test, (ao_num, ao_num, n_points_final_grid) ]
+
+  implicit none
+  integer                    :: ipoint, i, j
+  double precision           :: tmp_x, tmp_y, tmp_z
+  double precision           :: tmp1
+  double precision           :: time0, time1
+
+  print*, ' providing u12sq_j1bsq_test ...'
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+    tmp1  = -0.5d0 * (tmp_x * tmp_x + tmp_y * tmp_y + tmp_z * tmp_z)
+    do j = 1, ao_num
+      do i = 1, ao_num
+        u12sq_j1bsq_test(i,j,ipoint) = tmp1 * int2_u2_j1b2_test(i,j,ipoint)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12sq_j1bsq_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, u12_grad1_u12_j1b_grad1_j1b_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: x, y, z
+  double precision           :: tmp_v, tmp_x, tmp_y, tmp_z
+  double precision           :: tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+
+  print*, ' providing u12_grad1_u12_j1b_grad1_j1b_test ...'
+
+  provide int2_u_grad1u_x_j1b2_test
+  call wall_time(time0)
+
+  do ipoint = 1, n_points_final_grid
+
+    x     = final_grid_points(1,ipoint)
+    y     = final_grid_points(2,ipoint)
+    z     = final_grid_points(3,ipoint)
+    tmp_v = v_1b       (ipoint)
+    tmp_x = v_1b_grad(1,ipoint)
+    tmp_y = v_1b_grad(2,ipoint)
+    tmp_z = v_1b_grad(3,ipoint)
+
+    tmp3 = tmp_v * tmp_x
+    tmp4 = tmp_v * tmp_y
+    tmp5 = tmp_v * tmp_z
+
+    tmp6 = -x * tmp3
+    tmp7 = -y * tmp4
+    tmp8 = -z * tmp5
+
+    do j = 1, ao_num
+      do i = 1, ao_num
+
+        tmp9 = int2_u_grad1u_j1b2_test(i,j,ipoint)
+
+        u12_grad1_u12_j1b_grad1_j1b_test(i,j,ipoint) = tmp6 * tmp9 + tmp3 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,1) &
+                                                     + tmp7 * tmp9 + tmp4 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,2) &
+                                                     + tmp8 * tmp9 + tmp5 * int2_u_grad1u_x_j1b2_test(i,j,ipoint,3)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(time1)
+  print*, ' Wall time for u12_grad1_u12_j1b_grad1_j1b_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, grad12_j12_test, (ao_num, ao_num, n_points_final_grid) ]
+ 
+  implicit none
+  integer                    :: ipoint, i, j, m, igauss
+  double precision           :: r(3), delta, coef
+  double precision           :: tmp1
+  double precision           :: time0, time1
+  double precision, external :: overlap_gauss_r12_ao
+  provide int2_grad1u2_grad2u2_j1b2_test
+  print*, ' providing grad12_j12_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      tmp1 = v_1b(ipoint)
+      tmp1 = tmp1 * tmp1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          grad12_j12_test(i,j,ipoint) = tmp1 * int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint)
+        enddo
+      enddo
+    enddo
+
+  else
+
+    grad12_j12_test = 0.d0
+    do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do j = 1, ao_num
+        do i = 1, ao_num
+          do igauss = 1, n_max_fit_slat
+            delta = expo_gauss_1_erf_x_2(igauss)
+            coef  = coef_gauss_1_erf_x_2(igauss)
+            grad12_j12_test(i,j,ipoint) += -0.25d0 * coef * overlap_gauss_r12_ao(r, delta, i, j)
+          enddo
+        enddo
+      enddo
+    enddo
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for grad12_j12_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/j12_nucl_utils.irp.f b/src/non_h_ints_mu/j12_nucl_utils.irp.f
index f3b68f43..a515e0b8 100644
--- a/src/non_h_ints_mu/j12_nucl_utils.irp.f
+++ b/src/non_h_ints_mu/j12_nucl_utils.irp.f
@@ -237,6 +237,23 @@ end function j12_mu
 
 ! ---
 
+double precision function j12_mu_r12(r12)
+
+  include 'constants.include.F'
+
+  implicit none
+  double precision, intent(in) :: r12
+  double precision             :: mu_r12
+
+  mu_r12 = mu_erf * r12
+
+  j12_mu_r12 = 0.5d0 * r12 * (1.d0 - derf(mu_r12)) - inv_sq_pi_2 * dexp(-mu_r12*mu_r12) / mu_erf
+
+  return
+end function j12_mu_r12
+
+! ---
+
 double precision function j12_mu_gauss(r1, r2)
 
   implicit none
diff --git a/src/non_h_ints_mu/new_grad_tc.irp.f b/src/non_h_ints_mu/new_grad_tc.irp.f
index d34e629c..9aef436f 100644
--- a/src/non_h_ints_mu/new_grad_tc.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc.irp.f
@@ -1,22 +1,21 @@
-  
 ! ---
 
-BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (ao_num, ao_num, n_points_final_grid, 3)]
 
   BEGIN_DOC
   !
-  ! int2_grad1_u12_ao(:,i,j,ipoint) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
   !
   ! where r1 = r(ipoint)
   !
   ! if J(r1,r2) = u12:
   !
-  ! int2_grad1_u12_ao(:,i,j,ipoint) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  ! int2_grad1_u12_ao(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
   !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
   !
   ! if J(r1,r2) = u12 x v1 x v2
   !
-  ! int2_grad1_u12_ao(:,i,j,ipoint) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  ! int2_grad1_u12_ao(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
   !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
   !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
   !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
@@ -25,6 +24,95 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_poin
   !
   END_DOC
 
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: time0, time1
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
+
+          int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+          int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+          int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+          int2_grad1_u12_ao(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,1)
+          int2_grad1_u12_ao(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,2)
+          int2_grad1_u12_ao(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_transp_bis(ipoint,i,j,3)
+        enddo
+      enddo
+    enddo
+
+    int2_grad1_u12_ao *= 0.5d0
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for int2_grad1_u12_ao = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, int1_grad2_u12_ao, (3, ao_num, ao_num, n_points_final_grid)]
+
+  BEGIN_DOC
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = \int dr1 [-1 * \grad_r2 J(r1,r2)] \phi_i(r1) \phi_j(r1) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) = +0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r1) \phi_j(r1)
+  !                                 = -0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !                                 = -int2_grad1_u12_ao(i,j,ipoint,:)
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int1_grad2_u12_ao(:,i,j,ipoint) =      v2    x [ 0.5 x \int dr1 [-(r1 - r2) (erf(mu * r12)-1)r_12] v1 \phi_i(r1) \phi_j(r1) ]
+  !                                 - \grad_2 v2 x [       \int dr1                   u12              v1 \phi_i(r1) \phi_j(r1) ] 
+  !                                 =   -0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 +    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
   implicit none
   integer          :: ipoint, i, j
   double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
@@ -49,32 +137,16 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao, (3, ao_num, ao_num, n_poin
           tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
           tmp2 = v_ij_u_cst_mu_j1b(i,j,ipoint)
 
-          int2_grad1_u12_ao(1,i,j,ipoint) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint) - tmp2 * tmp_x
-          int2_grad1_u12_ao(2,i,j,ipoint) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint) - tmp2 * tmp_y
-          int2_grad1_u12_ao(3,i,j,ipoint) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint) - tmp2 * tmp_z
+          int1_grad2_u12_ao(1,i,j,ipoint) = -tmp1 * x + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,1) - tmp2 * tmp_x
+          int1_grad2_u12_ao(2,i,j,ipoint) = -tmp1 * y + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,2) - tmp2 * tmp_y
+          int1_grad2_u12_ao(3,i,j,ipoint) = -tmp1 * z + tmp0 * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,3) - tmp2 * tmp_z
         enddo
       enddo
     enddo
 
   else
 
-    do ipoint = 1, n_points_final_grid
-      x = final_grid_points(1,ipoint)
-      y = final_grid_points(2,ipoint)
-      z = final_grid_points(3,ipoint)
-
-      do j = 1, ao_num
-        do i = 1, ao_num
-          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
-
-          int2_grad1_u12_ao(1,i,j,ipoint) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(1,i,j,ipoint)
-          int2_grad1_u12_ao(2,i,j,ipoint) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(2,i,j,ipoint)
-          int2_grad1_u12_ao(3,i,j,ipoint) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(3,i,j,ipoint)
-        enddo
-      enddo
-    enddo
-
-    int2_grad1_u12_ao *= 0.5d0
+    int1_grad2_u12_ao = -1.d0 * int2_grad1_u12_ao
 
   endif
 
@@ -82,11 +154,11 @@ END_PROVIDER
 
 ! ---
 
-BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_loop, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
   !
-  ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  ! tc_grad_and_lapl_ao_loop(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
   !
   ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
   !
@@ -98,33 +170,48 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
   integer                       :: ipoint, i, j, k, l
   double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
   double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: ao_j_r, ao_l_r, ao_l_dx, ao_l_dy, ao_l_dz
+  double precision              :: time0, time1
   double precision, allocatable :: ac_mat(:,:,:,:)
 
+  print*, ' providing tc_grad_and_lapl_ao_loop ...'
+  call wall_time(time0)
+
   allocate(ac_mat(ao_num,ao_num,ao_num,ao_num))
   ac_mat = 0.d0
 
+  ! ---
+
   do ipoint = 1, n_points_final_grid
     weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
 
     do i = 1, ao_num
-      ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
-      ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
-      ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
-      ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+      !ao_i_r  = weight1 * aos_in_r_array_transp         (ipoint,i)
+      !ao_i_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,1)
+      !ao_i_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,2)
+      !ao_i_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,i,3)
+      ao_i_r  = weight1 * aos_in_r_array     (i,ipoint)
+      ao_i_dx = weight1 * aos_grad_in_r_array(i,ipoint,1)
+      ao_i_dy = weight1 * aos_grad_in_r_array(i,ipoint,2)
+      ao_i_dz = weight1 * aos_grad_in_r_array(i,ipoint,3)
 
       do k = 1, ao_num
-        ao_k_r = aos_in_r_array_transp(ipoint,k)
+        !ao_k_r = aos_in_r_array_transp(ipoint,k)
+        ao_k_r = aos_in_r_array(k,ipoint)
 
-        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
-        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
-        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
+        !tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1) 
+        !tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2) 
+        !tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3) 
+        tmp_x = ao_k_r * ao_i_dx - ao_i_r * aos_grad_in_r_array(k,ipoint,1) 
+        tmp_y = ao_k_r * ao_i_dy - ao_i_r * aos_grad_in_r_array(k,ipoint,2) 
+        tmp_z = ao_k_r * ao_i_dz - ao_i_r * aos_grad_in_r_array(k,ipoint,3) 
 
         do j = 1, ao_num
           do l = 1, ao_num
 
-            contrib_x = int2_grad1_u12_ao(1,l,j,ipoint) * tmp_x 
-            contrib_y = int2_grad1_u12_ao(2,l,j,ipoint) * tmp_y 
-            contrib_z = int2_grad1_u12_ao(3,l,j,ipoint) * tmp_z 
+            contrib_x = int2_grad1_u12_ao(l,j,ipoint,1) * tmp_x 
+            contrib_y = int2_grad1_u12_ao(l,j,ipoint,2) * tmp_y 
+            contrib_z = int2_grad1_u12_ao(l,j,ipoint,3) * tmp_z 
 
             ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
           enddo
@@ -132,7 +219,122 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
       enddo
     enddo
   enddo
+
+  ! ---
+
+  !do ipoint = 1, n_points_final_grid
+  !  weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+
+  !  do l = 1, ao_num
+  !    ao_l_r  = weight1 * aos_in_r_array_transp         (ipoint,l)
+  !    ao_l_dx = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,1)
+  !    ao_l_dy = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,2)
+  !    ao_l_dz = weight1 * aos_grad_in_r_array_transp_bis(ipoint,l,3)
+
+  !    do j = 1, ao_num
+  !      ao_j_r = aos_in_r_array_transp(ipoint,j)
+
+  !      tmp_x = ao_j_r * ao_l_dx - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,1) 
+  !      tmp_y = ao_j_r * ao_l_dy - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,2) 
+  !      tmp_z = ao_j_r * ao_l_dz - ao_l_r * aos_grad_in_r_array_transp_bis(ipoint,j,3) 
+
+  !      do i = 1, ao_num
+  !        do k = 1, ao_num
+
+  !          contrib_x = int2_grad1_u12_ao(k,i,ipoint,1) * tmp_x 
+  !          contrib_y = int2_grad1_u12_ao(k,i,ipoint,2) * tmp_y 
+  !          contrib_z = int2_grad1_u12_ao(k,i,ipoint,3) * tmp_z 
+
+  !          ac_mat(k,i,l,j) += contrib_x + contrib_y + contrib_z
+  !        enddo
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  ! ---
  
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+          !tc_grad_and_lapl_ao_loop(k,i,l,j) = ac_mat(k,i,l,j)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_loop = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) . \grad_1 | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l, m
+  double precision              :: weight1, ao_k_r, ao_i_r
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao ...'
+  call wall_time(time0)
+
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL                                                              &
+ !$OMP DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+ !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+ !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+
+        weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+        ao_i_r  = aos_in_r_array_transp(ipoint,i)
+        ao_k_r  = aos_in_r_array_transp(ipoint,k)
+
+        b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1)) 
+        b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2)) 
+        b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3)) 
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  do m = 1, 3
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0              &
+              , int2_grad1_u12_ao(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num) 
+
+  enddo
+  deallocate(b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao, ao_num)
+ !$OMP DO SCHEDULE (static)
   do j = 1, ao_num
     do l = 1, ao_num
       do i = 1, ao_num
@@ -142,10 +344,16 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,
       enddo
     enddo
   enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
 
   deallocate(ac_mat)
 
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao = ', time1 - time0
+
 END_PROVIDER 
 
 ! ---
 
+
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
new file mode 100644
index 00000000..4d85e061
--- /dev/null
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -0,0 +1,174 @@
+
+BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (ao_num, ao_num, n_points_final_grid, 3)]
+
+  BEGIN_DOC
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) = \int dr2 [-1 * \grad_r1 J(r1,r2)] \phi_i(r2) \phi_j(r2) 
+  !
+  ! where r1 = r(ipoint)
+  !
+  ! if J(r1,r2) = u12:
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) = 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] \phi_i(r2) \phi_j(r2)
+  !                                 = 0.5 * [ v_ij_erf_rk_cst_mu(i,j,ipoint) * r(:) - x_v_ij_erf_rk_cst_mu(i,j,ipoint,:) ]
+  !
+  ! if J(r1,r2) = u12 x v1 x v2
+  !
+  ! int2_grad1_u12_ao_test(i,j,ipoint,:) =      v1    x [ 0.5 x \int dr2 [(r1 - r2) (erf(mu * r12)-1)r_12] v2 \phi_i(r2) \phi_j(r2) ]
+  !                                 - \grad_1 v1 x [       \int dr2                  u12              v2 \phi_i(r2) \phi_j(r2) ] 
+  !                                 =    0.5 v_1b(ipoint) * v_ij_erf_rk_cst_mu_j1b(i,j,ipoint) * r(:) 
+  !                                 -    0.5 v_1b(ipoint) * x_v_ij_erf_rk_cst_mu_j1b(i,j,ipoint,:) 
+  !                                 - v_1b_grad[:,ipoint] * v_ij_u_cst_mu_j1b(i,j,ipoint)
+  !
+  !
+  END_DOC
+
+  implicit none
+  integer          :: ipoint, i, j
+  double precision :: time0, time1
+  double precision :: x, y, z, tmp_x, tmp_y, tmp_z, tmp0, tmp1, tmp2
+
+  print*, ' providing int2_grad1_u12_ao_test ...'
+  call wall_time(time0)
+
+  PROVIDE j1b_type
+  
+  if(j1b_type .eq. 3) then
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      tmp0  = 0.5d0 * v_1b(ipoint)
+      tmp_x =  v_1b_grad(1,ipoint)
+      tmp_y =  v_1b_grad(2,ipoint)
+      tmp_z =  v_1b_grad(3,ipoint)
+  
+      do j = 1, ao_num
+        do i = 1, ao_num
+
+          tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
+          tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,1) - tmp2 * tmp_x
+          int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,2) - tmp2 * tmp_y
+          int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - tmp0 * x_v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint,3) - tmp2 * tmp_z
+        enddo
+      enddo
+    enddo
+
+  else
+
+    do ipoint = 1, n_points_final_grid
+      x = final_grid_points(1,ipoint)
+      y = final_grid_points(2,ipoint)
+      z = final_grid_points(3,ipoint)
+
+      do j = 1, ao_num
+        do i = 1, ao_num
+          tmp1 = v_ij_erf_rk_cst_mu(i,j,ipoint)
+
+          int2_grad1_u12_ao_test(i,j,ipoint,1) = tmp1 * x - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,1)
+          int2_grad1_u12_ao_test(i,j,ipoint,2) = tmp1 * y - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,2)
+          int2_grad1_u12_ao_test(i,j,ipoint,3) = tmp1 * z - x_v_ij_erf_rk_cst_mu_tmp(i,j,ipoint,3)
+        enddo
+      enddo
+    enddo
+
+    int2_grad1_u12_ao_test *= 0.5d0
+
+  endif
+
+  call wall_time(time1)
+  print*, ' Wall time for int2_grad1_u12_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! tc_grad_and_lapl_ao_test(k,i,l,j) = < k l | -1/2 \Delta_1 u(r1,r2) - \grad_1 u(r1,r2) | ij >
+  !
+  ! = 1/2 \int dr1 (phi_k(r1) \grad_r1 phi_i(r1) - phi_i(r1) \grad_r1 phi_k(r1)) . \int dr2 \grad_r1 u(r1,r2) \phi_l(r2) \phi_j(r2) 
+  !
+  ! This is obtained by integration by parts. 
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: ipoint, i, j, k, l, m
+  double precision              :: weight1, contrib_x, contrib_y, contrib_z, tmp_x, tmp_y, tmp_z
+  double precision              :: ao_k_r, ao_i_r, ao_i_dx, ao_i_dy, ao_i_dz
+  double precision              :: time0, time1
+  double precision, allocatable :: ac_mat(:,:,:,:), b_mat(:,:,:,:)
+
+  print*, ' providing tc_grad_and_lapl_ao_test ...'
+  call wall_time(time0)
+
+  provide int2_grad1_u12_ao_test 
+ 
+  allocate(b_mat(n_points_final_grid,ao_num,ao_num,3), ac_mat(ao_num,ao_num,ao_num,ao_num))
+
+  b_mat = 0.d0
+ !$OMP PARALLEL                                                              &
+ !$OMP DEFAULT (NONE)                                                        &
+ !$OMP PRIVATE (i, k, ipoint, weight1, ao_i_r, ao_k_r)                       & 
+ !$OMP SHARED (aos_in_r_array_transp, aos_grad_in_r_array_transp_bis, b_mat, & 
+ !$OMP         ao_num, n_points_final_grid, final_weight_at_r_vector)
+ !$OMP DO SCHEDULE (static)
+  do i = 1, ao_num
+    do k = 1, ao_num
+      do ipoint = 1, n_points_final_grid
+
+        weight1 = 0.5d0 * final_weight_at_r_vector(ipoint)
+        ao_i_r  = aos_in_r_array_transp(ipoint,i)
+        ao_k_r  = aos_in_r_array_transp(ipoint,k)
+
+        b_mat(ipoint,k,i,1) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,1) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,1))
+        b_mat(ipoint,k,i,2) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,2) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,2))
+        b_mat(ipoint,k,i,3) = weight1 * (ao_k_r * aos_grad_in_r_array_transp_bis(ipoint,i,3) - ao_i_r * aos_grad_in_r_array_transp_bis(ipoint,k,3))
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  ac_mat = 0.d0
+  do m = 1, 3
+    call dgemm( "N", "N", ao_num*ao_num, ao_num*ao_num, n_points_final_grid, 1.d0                   &
+              , int2_grad1_u12_ao_test(1,1,1,m), ao_num*ao_num, b_mat(1,1,1,m), n_points_final_grid &
+              , 1.d0, ac_mat, ao_num*ao_num)
+
+  enddo
+  deallocate(b_mat)
+
+ !$OMP PARALLEL             &
+ !$OMP DEFAULT (NONE)       &
+ !$OMP PRIVATE (i, j, k, l) & 
+ !$OMP SHARED (ac_mat, tc_grad_and_lapl_ao_test, ao_num)
+ !$OMP DO SCHEDULE (static)
+  do j = 1, ao_num
+    do l = 1, ao_num
+      do i = 1, ao_num
+        do k = 1, ao_num
+          tc_grad_and_lapl_ao_test(k,i,l,j) = ac_mat(k,i,l,j) + ac_mat(l,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  deallocate(ac_mat)
+
+  call wall_time(time1)
+  print*, ' Wall time for tc_grad_and_lapl_ao_test = ', time1 - time0
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/non_h_ints_mu/total_tc_int.irp.f b/src/non_h_ints_mu/total_tc_int.irp.f
index 979296d1..81747553 100644
--- a/src/non_h_ints_mu/total_tc_int.irp.f
+++ b/src/non_h_ints_mu/total_tc_int.irp.f
@@ -7,17 +7,22 @@ BEGIN_PROVIDER [double precision, ao_tc_int_chemist, (ao_num, ao_num, ao_num, ao
   integer          :: i, j, k, l
   double precision :: wall1, wall0
 
+  print *, ' providing ao_tc_int_chemist ...'
   call wall_time(wall0)
-
-  do j = 1, ao_num
-    do l = 1, ao_num
-      do i = 1, ao_num
-        do k = 1, ao_num
-          ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
-        enddo
-      enddo
-    enddo
-  enddo
+  
+  if(test_cycle_tc)then
+   ao_tc_int_chemist = ao_tc_int_chemist_test
+  else
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist(k,i,l,j) = tc_grad_square_ao(k,i,l,j) + tc_grad_and_lapl_ao(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+  endif
 
   call wall_time(wall1)
   print *, ' wall time for ao_tc_int_chemist ', wall1 - wall0
@@ -26,6 +31,32 @@ END_PROVIDER
 
 ! ---
 
+BEGIN_PROVIDER [double precision, ao_tc_int_chemist_test, (ao_num, ao_num, ao_num, ao_num)]
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: wall1, wall0
+
+  print *, ' providing ao_tc_int_chemist_test ...'
+  call wall_time(wall0)
+
+   do j = 1, ao_num
+     do l = 1, ao_num
+       do i = 1, ao_num
+         do k = 1, ao_num
+           ao_tc_int_chemist_test(k,i,l,j) = tc_grad_square_ao_test(k,i,l,j) + tc_grad_and_lapl_ao_test(k,i,l,j) + ao_two_e_coul(k,i,l,j)
+         enddo
+       enddo
+     enddo
+   enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for ao_tc_int_chemist_test ', wall1 - wall0
+
+END_PROVIDER 
+
+! ---
+
 BEGIN_PROVIDER [double precision, ao_two_e_coul, (ao_num, ao_num, ao_num, ao_num) ]
 
   BEGIN_DOC
diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f
index df1eb71d..78fddf54 100644
--- a/src/non_hermit_dav/biorthog.irp.f
+++ b/src/non_hermit_dav/biorthog.irp.f
@@ -283,16 +283,16 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   ! -------------------------------------------------------------------------------------
   !
 
-  print *, ' '
-  print *, ' Computing the left/right eigenvectors ...'
-  print *, ' '
+  !print *, ' '
+  !print *, ' Computing the left/right eigenvectors ...'
+  !print *, ' '
 
-  allocate( WR(n), WI(n), VL(n,n), VR(n,n) ) 
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n)) 
   
-  print *, ' fock matrix'
-  do i = 1, n
-    write(*, '(1000(F16.10,X))') A(i,:)
-  enddo
+  !print *, ' fock matrix'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') A(i,:)
+  !enddo
 
   !thr_cut = 1.d-15
   !call cancel_small_elmts(A, n, thr_cut)
@@ -301,11 +301,11 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   call lapack_diag_non_sym(n, A, WR, WI, VL, VR)
   !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR)
 
-  print *, ' '
-  print *, ' eigenvalues'
-  do i = 1, n
-    write(*, '(1000(F16.10,X))') WR(i), WI(i)
-  enddo
+  !print *, ' '
+  !print *, ' eigenvalues'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') WR(i), WI(i)
+  !enddo
   !print *, ' right eigenvect bef' 
   !do i = 1, n
   !  write(*, '(1000(F16.10,X))') VR(:,i)
@@ -328,9 +328,10 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
   !                  track & sort the real eigenvalues 
 
   n_good = 0
-  thr    = 1.d-3
+  !thr    = 100d0
+  thr    = Im_thresh_tcscf
   do i = 1, n
-    print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    !print*, 'Re(i) + Im(i)', WR(i), WI(i)
     if(dabs(WI(i)) .lt. thr) then
       n_good += 1
     else
@@ -404,23 +405,24 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
 
   if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .lt. thr_d) ) then
 
-    print *, ' lapack vectors are normalized and bi-orthogonalized'
+    !print *, ' lapack vectors are normalized and bi-orthogonalized'
     deallocate(S)
     return
 
-  elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
+  ! accu_nd is modified after adding the normalization
+  !elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv))/dble(n_real_eigv) .gt. thr_d) ) then
 
-    print *, ' lapack vectors are not normalized but bi-orthogonalized'
-    call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
+  !  print *, ' lapack vectors are not normalized but bi-orthogonalized'
+  !  call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, thr_d, thr_nd, .true.)
 
-    call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
+  !  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
 
-    deallocate(S)
-    return
+  !  deallocate(S)
+  !  return
 
   else
 
-    print *, ' lapack vectors are not normalized neither bi-orthogonalized'
+    !print *, ' lapack vectors are not normalized neither bi-orthogonalized'
 
     ! ---
 
@@ -442,8 +444,8 @@ subroutine non_hrmt_bieig(n, A, thr_d, thr_nd, leigvec, reigvec, n_real_eigv, ei
     endif
     call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, thr_d, thr_nd, .true.)
 
-    !call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec)
-    !call impose_biorthog_lu(n, n_real_eigv, leigvec, reigvec)
+    !call impose_biorthog_qr(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec)
+    !call impose_biorthog_lu(n, n_real_eigv, thr_d, thr_nd, leigvec, reigvec)
 
     ! ---
 
@@ -609,7 +611,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva
   enddo
   accu_nd = dsqrt(accu_nd)
 
-  if(accu_nd .lt. 1d-8) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     ! L x R is already bi-orthogonal
 
     print *, ' L & T bi-orthogonality: ok'
@@ -621,7 +623,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva
 
     print *, ' L & T bi-orthogonality: not imposed yet'
     print *, ' accu_nd = ', accu_nd
-    call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec)
+    call impose_biorthog_qr(n, n_real_eigv, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec)
     deallocate( S )
   
   endif
@@ -631,7 +633,7 @@ subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigva
 
   return
 
-end 
+end subroutine non_hrmt_bieig_random_diag
 
 ! ---
 
@@ -959,7 +961,7 @@ subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval)
   enddo
   accu_nd = dsqrt(accu_nd)
 
-  if( accu_nd .lt. 1d-8 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     ! L x R is already bi-orthogonal
 
     !print *, ' L & T bi-orthogonality: ok'
@@ -971,7 +973,7 @@ subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval)
 
     !print *, ' L & T bi-orthogonality: not imposed yet'
     !print *, ' accu_nd = ', accu_nd
-    call impose_biorthog_qr(n, n, leigvec, reigvec)
+    call impose_biorthog_qr(n, n, thresh_biorthog_diag, thresh_biorthog_nondiag, leigvec, reigvec)
     deallocate( S )
   
   endif
diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
index 53c62ce8..0d652af4 100644
--- a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
+++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
@@ -930,7 +930,7 @@ subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, s
 
     tmp_abs = tmp_abs + tmp
     V_nrm   = V_nrm   + U_nrm 
-    write(*,'(I4,X,(100(F25.16,X)))')j,eigval(j), tmp, U_nrm
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
 
   enddo
 
@@ -973,7 +973,7 @@ subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, s
 
     tmp_abs = tmp_abs + tmp
     V_nrm   = V_nrm   + U_nrm 
-    write(*,'(I4,X,(100(F25.16,X)))')j,eigval(j), tmp, U_nrm
+    !write(*,'(I4,X,(100(F25.16,X)))') j,eigval(j), tmp, U_nrm
 
   enddo
 
@@ -1082,7 +1082,7 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
   double precision, allocatable   :: S(:,:), tmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
-  print *, ' apply SVD to orthogonalize & normalize weighted vectors'
+  !print *, ' apply SVD to orthogonalize & normalize weighted vectors'
 
   ! ---
 
@@ -1097,10 +1097,10 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1160,10 +1160,10 @@ subroutine impose_weighted_orthog_svd(n, m, W, C)
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   deallocate(S)
 
@@ -1185,7 +1185,7 @@ subroutine impose_orthog_svd(n, m, C)
   double precision, allocatable   :: S(:,:), tmp(:,:)
   double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
 
-  print *, ' apply SVD to orthogonalize & normalize vectors'
+  !print *, ' apply SVD to orthogonalize & normalize vectors'
 
   ! ---
 
@@ -1196,10 +1196,10 @@ subroutine impose_orthog_svd(n, m, C)
             , C, size(C, 1), C, size(C, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' eigenvec overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1224,6 +1224,7 @@ subroutine impose_orthog_svd(n, m, C)
   if(num_linear_dependencies > 0) then
     write(*,*) ' linear dependencies = ', num_linear_dependencies
     write(*,*) ' m                   = ', m
+    write(*,*) ' try with Graham-Schmidt'
     stop
   endif
 
@@ -1256,10 +1257,10 @@ subroutine impose_orthog_svd(n, m, C)
             , C, size(C, 1), C, size(C, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' eigenvec overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   deallocate(S)
 
@@ -1296,10 +1297,10 @@ subroutine impose_orthog_svd_overlap(n, m, C, overlap)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' eigenvec overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -1358,10 +1359,10 @@ subroutine impose_orthog_svd_overlap(n, m, C, overlap)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' eigenvec overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' eigenvec overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
   deallocate(S)
 
 end subroutine impose_orthog_svd_overlap
@@ -1528,11 +1529,11 @@ subroutine impose_orthog_degen_eigvec(n, e0, C0)
   enddo
 
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -1677,7 +1678,7 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
   double precision                :: accu_d, accu_nd, s_tmp
   double precision, allocatable   :: S(:,:)
 
-  print *, ' check bi-orthonormality'
+  !print *, ' check bi-orthonormality'
 
   ! ---
 
@@ -1714,15 +1715,19 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
     enddo
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
-  print*, '    diag acc: ', accu_d
-  print*, ' nondiag acc: ', accu_nd
+  !print*, '    diag acc bef = ', accu_d
+  !print*, ' nondiag acc bef = ', accu_nd
 
   ! ---
 
   if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
 
     do i = 1, m
-      print *, i, S(i,i)
+      if(S(i,i) <= 0.d0) then
+        print *, ' overap negative'
+        print *, i, S(i,i)
+        exit
+      endif
       if(dabs(S(i,i) - 1.d0) .gt. thr_d) then
         s_tmp = 1.d0 / dsqrt(S(i,i))
         do j = 1, n
@@ -1757,8 +1762,8 @@ subroutine check_biorthog_binormalize(n, m, Vl, Vr, thr_d, thr_nd, stop_ifnot)
     enddo
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
-  print *, '    diag acc: ', accu_d
-  print *, ' nondiag acc: ', accu_nd
+  !print *, '    diag acc aft = ', accu_d
+  !print *, ' nondiag acc aft = ', accu_nd
 
   deallocate(S)
 
@@ -1801,10 +1806,10 @@ subroutine check_weighted_biorthog(n, m, W, Vl, Vr, thr_d, thr_nd, accu_d, accu_
             , 0.d0, S, size(S, 1) )
   deallocate(tmp)
 
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1852,17 +1857,18 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_
   integer                       :: i, j
   double precision, allocatable :: SS(:,:)
 
-  print *, ' check bi-orthogonality'
+  !print *, ' check bi-orthogonality'
 
   ! ---
 
   call dgemm( 'T', 'N', m, m, n, 1.d0          &
             , Vl, size(Vl, 1), Vr, size(Vr, 1) &
             , 0.d0, S, size(S, 1) )
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
+
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1877,12 +1883,12 @@ subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, thr_d, thr_nd, stop_
   enddo
   accu_nd = dsqrt(accu_nd) / dble(m)
 
-  print *, ' accu_nd = ', accu_nd
-  print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+  !print *, ' accu_nd = ', accu_nd
+  !print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
 
   ! ---
 
-  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+  if(stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) then
     print *, ' non bi-orthogonal vectors !'
     print *, ' accu_nd = ', accu_nd
     print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
@@ -1912,12 +1918,12 @@ subroutine check_orthog(n, m, V, accu_d, accu_nd, S)
             , V, size(V, 1), V, size(V, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ''
-  print *, ' overlap matrix:'
-  do i = 1, m
-    write(*,'(1000(F16.10,X))') S(i,:)
-  enddo
-  print *, ''
+  !print *, ''
+  !print *, ' overlap matrix:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+  !print *, ''
 
   accu_d  = 0.d0
   accu_nd = 0.d0
@@ -1981,11 +1987,11 @@ subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0)
     enddo
   enddo
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i), e0(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i), e0(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -2181,11 +2187,11 @@ subroutine impose_unique_biorthog_degen_eigvec(n, thr_d, thr_nd, e0, C0, W0, L0,
     enddo
   enddo
   
-  do i = 1, n
-    if(deg_num(i).gt.1) then
-      print *, ' degen on', i, deg_num(i)
-    endif
-  enddo
+  !do i = 1, n
+  !  if(deg_num(i) .gt. 1) then
+  !    print *, ' degen on', i, deg_num(i)
+  !  endif
+  !enddo
 
   ! ---
 
@@ -2414,10 +2420,10 @@ subroutine impose_biorthog_svd(n, m, L, R)
             , L, size(L, 1), R, size(R, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -2489,10 +2495,11 @@ subroutine impose_biorthog_svd(n, m, L, R)
             , L, size(L, 1), R, size(R, 1) &
             , 0.d0, S, size(S, 1) )
 
-  print *, ' overlap aft SVD: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
   deallocate(S)
 
   ! ---
@@ -2806,10 +2813,10 @@ subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' overlap bef SVD: '
-  do i = 1, m
-    write(*, '(1000(F25.16,X))') S(i,:)
-  enddo
+  !print *, ' overlap bef SVD: '
+  !do i = 1, m
+  !  write(*, '(1000(F25.16,X))') S(i,:)
+  !enddo
 
   ! ---
  
@@ -2886,10 +2893,11 @@ subroutine impose_weighted_biorthog_svd(n, m, overlap, L, R)
             , 0.d0, S, size(S, 1) )
   deallocate(Stmp)
 
-  print *, ' overlap aft SVD with overlap: '
-  do i = 1, m
-    write(*, '(1000(F16.10,X))') S(i,:)
-  enddo
+  !print *, ' overlap aft SVD with overlap: '
+  !do i = 1, m
+  !  write(*, '(1000(F16.10,X))') S(i,:)
+  !enddo
+
   deallocate(S)
 
   return
diff --git a/src/non_hermit_dav/new_routines.irp.f b/src/non_hermit_dav/new_routines.irp.f
index 07ac5917..4dea5f66 100644
--- a/src/non_hermit_dav/new_routines.irp.f
+++ b/src/non_hermit_dav/new_routines.irp.f
@@ -132,9 +132,9 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei
 
 !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
   !                               check bi-orthogonality
-  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else
     print *, ' '
@@ -149,14 +149,14 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei
     deallocate(S_nh_inv_half)
     call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp)
     call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp)
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
-    if( accu_nd .lt. 1d-10 ) then
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
      call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S)
-     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
-     if( accu_nd .lt. 1d-10 ) then
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+     if(accu_nd .lt. thresh_biorthog_nondiag) then
        print *, ' bi-orthogonality: ok'
      else 
       print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -200,10 +200,10 @@ subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_ei
   shift_current = max(1.d-10,shift_current)
   print*,'Thr for eigenvectors = ',shift_current
   call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
-  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
   
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else 
    print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
@@ -354,14 +354,14 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
 
 !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
   !                               check bi-orthogonality
-  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else
     print *, ' '
     print *, ' bi-orthogonality: not imposed yet'
-    if(complex_root)then 
+    if(complex_root) then 
      print *, ' '
      print *, ' '
      print *, ' orthog between degen eigenvect' 
@@ -369,9 +369,9 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
      ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
      call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) ! orthogonalization of reigvec
      call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) ! orthogonalization of leigvec
-     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) 
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S,  thresh_biorthog_diag, thresh_biorthog_nondiag, .false.) 
 
-     if( accu_nd .lt. 1d-10 ) then
+     if(accu_nd .lt. thresh_biorthog_nondiag) then
        print *, ' bi-orthogonality: ok'
      else 
       print*,'New vectors not bi-orthonormals at ', accu_nd
@@ -387,8 +387,8 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
      print*,'S^{-1/2} exists !!'
      call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
     endif
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
-    if( accu_nd .lt. 1d-10 ) then
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -431,10 +431,10 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
   shift_current = max(1.d-10,shift_current)
   print*,'Thr for eigenvectors = ',shift_current
   call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
-  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
   
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else 
    print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
@@ -472,6 +472,7 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
   double precision :: accu,thr_cut
   double precision, allocatable :: S_nh_inv_half(:,:)
   logical :: complex_root
+  double precision :: thr_norm=1d0
 
 
   thr_cut = 1.d-15
@@ -580,9 +581,9 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
 
 !!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
   !                               check bi-orthogonality
-  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else
     print *, ' '
@@ -593,9 +594,9 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
     print *, ' '
     ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
     call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, leigvec_tmp, reigvec_tmp)
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
     print*,'accu_nd = ',accu_nd
-    if( accu_nd .lt. 1d-10 ) then
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -608,8 +609,8 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
       call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
      endif
     endif
-    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
-    if( accu_nd .lt. 1d-10 ) then
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
+    if(accu_nd .lt. thresh_biorthog_nondiag) then
       print *, ' bi-orthogonality: ok'
     else 
      print*,'New vectors not bi-orthonormals at ',accu_nd
@@ -651,11 +652,11 @@ subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
   print*,'Checking for final reigvec/leigvec'
   shift_current = max(1.d-10,shift_current)
   print*,'Thr for eigenvectors = ',shift_current
-  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec,shift_current)
-  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S)
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, thresh_biorthog_diag, thresh_biorthog_nondiag, .false.)
   print *, ' accu_nd bi-orthog = ', accu_nd
   
-  if( accu_nd .lt. 1d-10 ) then
+  if(accu_nd .lt. thresh_biorthog_nondiag) then
     print *, ' bi-orthogonality: ok'
   else 
    print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
diff --git a/src/scf_utils/diagonalize_fock.irp.f b/src/scf_utils/diagonalize_fock.irp.f
index 5188581a..008344f2 100644
--- a/src/scf_utils/diagonalize_fock.irp.f
+++ b/src/scf_utils/diagonalize_fock.irp.f
@@ -20,6 +20,12 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num)
      enddo
    enddo
 
+  !print *, ' Fock_matrix_MO :'
+  !do i = 1, mo_num
+  !  write(*, '(100(f15.7, 2x))') (Fock_matrix_MO(j,i), j = 1, mo_num)
+  !enddo
+
+
    if(frozen_orb_scf)then
      integer                        :: iorb,jorb
      do i = 1, n_core_orb
@@ -57,7 +63,6 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num)
    do i = elec_beta_num+1, elec_alpha_num
      F(i,i) += 0.5d0*level_shift
    enddo
-
    do i = elec_alpha_num+1, mo_num
      F(i,i) += level_shift
    enddo
@@ -90,6 +95,10 @@ BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_num)
    call dsyevd( 'V', 'U', mo_num, F,                             &
        size(F,1), diag, work, lwork, iwork, liwork, info)
    deallocate(iwork)
+  !print*, ' Fock eigval:'
+  !do i = 1, mo_num
+  !  print *, diag(i)
+  !enddo
 
 
    if (info /= 0) then
diff --git a/src/scf_utils/diis.irp.f b/src/scf_utils/diis.irp.f
index 713de1b3..63a847ce 100644
--- a/src/scf_utils/diis.irp.f
+++ b/src/scf_utils/diis.irp.f
@@ -1,3 +1,5 @@
+! ---
+
 BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero ]
  implicit none
  BEGIN_DOC
@@ -12,6 +14,8 @@ BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero ]
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO, (AO_num, AO_num)]
   implicit none
   BEGIN_DOC
@@ -60,6 +64,8 @@ BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO, (AO_num, AO_num)]
 
 END_PROVIDER
 
+! ---
+
 BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO, (mo_num, mo_num)]
   implicit none
   begin_doc
@@ -69,6 +75,7 @@ BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO, (mo_num, mo_num)]
      FPS_SPF_Matrix_MO, size(FPS_SPF_Matrix_MO,1))
 END_PROVIDER
 
+! ---
 
  BEGIN_PROVIDER [ double precision, eigenvalues_Fock_matrix_AO, (AO_num) ]
 &BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_AO, (AO_num,AO_num) ]
@@ -137,3 +144,175 @@ END_PROVIDER
 
 END_PROVIDER
 
+! ---
+
+!BEGIN_PROVIDER [double precision, error_diis_Fmo, (ao_num, ao_num)]
+!
+!  BEGIN_DOC
+!  !
+!  ! error_diis_Fmo = (S x C) x [F_mo x \eta_occ - \eta_occ x F_mo] x (S x C).T
+!  !
+!  ! \eta_occ is the matrix of occupation : \eta_occ = \eta_occ(alpha) + \eta_occ(beta)
+!  !
+!  END_DOC
+!
+!  implicit none
+!  integer                       :: i, j
+!  double precision, allocatable :: tmp(:,:)
+!
+!  provide Fock_matrix_mo
+!
+!  allocate(tmp(mo_num,mo_num))
+!  tmp = 0.d0
+!
+!  ! F_mo x \eta_occ(alpha) - \eta_occ x F_mo(alpha)
+!  do j = 1, elec_alpha_num
+!    do i = elec_alpha_num + 1, mo_num
+!      tmp(i,j) = Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!  do j = elec_alpha_num + 1, mo_num
+!    do i = 1, elec_alpha_num
+!      tmp(i,j) = -Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!
+!  ! F_mo x \eta_occ(beta) - \eta_occ x F_mo(beta)
+!  do j = 1, elec_beta_num
+!    do i = elec_beta_num + 1, mo_num
+!      tmp(i,j) += Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!  do j = elec_beta_num + 1, mo_num
+!    do i = 1, elec_beta_num
+!      tmp(i,j) -= Fock_matrix_mo(i,j)
+!    enddo
+!  enddo
+!
+!  call mo_to_ao(tmp, size(tmp, 1), error_diis_Fmo, size(error_diis_Fmo, 1))
+!  
+!  deallocate(tmp)
+!
+!END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, error_diis_Fmo, (mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! error_diis_Fmo = [F_mo x \eta_occ - \eta_occ x F_mo]
+  !
+  ! \eta_occ is the matrix of occupation : \eta_occ = \eta_occ(alpha) + \eta_occ(beta)
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j
+  double precision, allocatable :: tmp(:,:)
+
+  provide Fock_matrix_mo
+
+  error_diis_Fmo = 0.d0
+
+  ! F_mo x \eta_occ(alpha) - \eta_occ x F_mo(alpha)
+  do j = 1, elec_alpha_num
+    do i = elec_alpha_num + 1, mo_num
+      error_diis_Fmo(i,j) += Fock_matrix_mo(i,j)
+    enddo
+  enddo
+  do j = elec_alpha_num + 1, mo_num
+    do i = 1, elec_alpha_num
+      error_diis_Fmo(i,j) -= Fock_matrix_mo(i,j)
+    enddo
+  enddo
+
+  ! F_mo x \eta_occ(beta) - \eta_occ x F_mo(beta)
+  do j = 1, elec_beta_num
+    do i = elec_beta_num + 1, mo_num
+      error_diis_Fmo(i,j) += Fock_matrix_mo(i,j)
+    enddo
+  enddo
+  do j = elec_beta_num + 1, mo_num
+    do i = 1, elec_beta_num
+      error_diis_Fmo(i,j) -= Fock_matrix_mo(i,j)
+    enddo
+  enddo
+
+  !allocate(tmp(ao_num,ao_num))
+  !call mo_to_ao(error_diis_Fmo, size(error_diis_Fmo, 1), tmp, size(tmp, 1))
+  !call ao_to_mo(tmp, size(tmp, 1), error_diis_Fmo, size(error_diis_Fmo, 1))
+  !deallocate(tmp)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO_a, (AO_num, AO_num)]
+
+  implicit none
+  double precision, allocatable  :: scratch(:,:)
+
+  allocate(scratch(AO_num, AO_num))
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                                                 &
+            , Fock_Matrix_AO_alpha, size(Fock_Matrix_AO_alpha, 1), SCF_density_matrix_ao_alpha, size(SCF_Density_Matrix_AO_alpha, 1) &
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                     &
+            , scratch, size(scratch, 1), AO_Overlap, size(AO_Overlap, 1) &
+            , 0.d0, FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                             &
+            , AO_Overlap, size(AO_Overlap, 1), SCF_density_matrix_ao_alpha, size(SCF_density_matrix_ao_alpha, 1) & 
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, -1.d0                                        &
+            , scratch, size(scratch, 1), Fock_Matrix_AO_alpha, size(Fock_Matrix_AO_alpha, 1) &
+            , 1.d0, FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO_b, (AO_num, AO_num)]
+
+  implicit none
+  double precision, allocatable  :: scratch(:,:)
+
+  allocate(scratch(AO_num, AO_num))
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                                             &
+            , Fock_Matrix_AO_beta, size(Fock_Matrix_AO_beta, 1), SCF_density_matrix_ao_beta, size(SCF_Density_Matrix_AO_beta, 1) &
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                     &
+            , scratch, size(scratch, 1), AO_Overlap, size(AO_Overlap, 1) &
+            , 0.d0, FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, 1.d0                                                           &
+            , AO_Overlap, size(AO_Overlap, 1), SCF_density_matrix_ao_beta, size(SCF_density_matrix_ao_beta, 1) & 
+            , 0.d0, scratch, size(scratch, 1) )
+
+  call dgemm( 'N', 'N', AO_num, AO_num, AO_num, -1.d0                                      &
+            , scratch, size(scratch, 1), Fock_Matrix_AO_beta, size(Fock_Matrix_AO_beta, 1) &
+            , 1.d0, FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO_a, (mo_num, mo_num)]
+  implicit none
+  call ao_to_mo(FPS_SPF_Matrix_AO_a, size(FPS_SPF_Matrix_AO_a, 1), FPS_SPF_Matrix_MO_a, size(FPS_SPF_Matrix_MO_a, 1))
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO_b, (mo_num, mo_num)]
+  implicit none
+  call ao_to_mo(FPS_SPF_Matrix_AO_b, size(FPS_SPF_Matrix_AO_b, 1), FPS_SPF_Matrix_MO_b, size(FPS_SPF_Matrix_MO_b, 1))
+END_PROVIDER
+
+! ---
+
diff --git a/src/scf_utils/fock_matrix.irp.f b/src/scf_utils/fock_matrix.irp.f
index 539f1eb3..baefcd6c 100644
--- a/src/scf_utils/fock_matrix.irp.f
+++ b/src/scf_utils/fock_matrix.irp.f
@@ -267,3 +267,5 @@ BEGIN_PROVIDER [ double precision, SCF_energy ]
 
 END_PROVIDER
 
+! ---
+
diff --git a/src/scf_utils/rh_scf_simple.irp.f b/src/scf_utils/rh_scf_simple.irp.f
new file mode 100644
index 00000000..cd7ba66f
--- /dev/null
+++ b/src/scf_utils/rh_scf_simple.irp.f
@@ -0,0 +1,129 @@
+subroutine Roothaan_Hall_SCF_Simple
+
+BEGIN_DOC
+! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+END_DOC
+
+  implicit none
+
+  integer                        :: iteration_SCF, dim_DIIS
+  double precision               :: energy_SCF,energy_SCF_previous,Delta_energy_SCF
+  double precision               :: max_error_DIIS
+
+  integer                        :: i,j
+  logical, external              :: qp_stop
+  double precision, allocatable :: mo_coef_save(:,:)
+
+  PROVIDE ao_md5 mo_occ level_shift
+
+  allocate(mo_coef_save(ao_num,mo_num))
+
+
+  dim_DIIS     = 0
+  mo_coef_save = 0.d0
+
+  call write_time(6)
+
+  print*,'energy of the guess = ',SCF_energy
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'energy  ', 'energy diff  ',  'DIIS error  ', 'Level shift   '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+
+! Initialize energies and density matrices
+  energy_SCF_previous = SCF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  max_error_DIIS      = 1.d0
+
+  do while ( &
+    ( (max_error_DIIS > threshold_DIIS_nonzero) .or. &
+      (dabs(Delta_energy_SCF) > thresh_SCF) &
+    ) .and. (iteration_SCF < n_it_SCF_max) )
+
+    iteration_SCF += 1
+    if(frozen_orb_scf)then
+     call initialize_mo_coef_begin_iteration
+    endif
+
+    MO_coef = eigenvectors_Fock_matrix_MO
+    if(frozen_orb_scf)then
+     call reorder_core_orb
+     call initialize_mo_coef_begin_iteration
+    endif
+    TOUCH MO_coef
+
+!   Calculate error vectors
+    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
+
+!   SCF energy
+
+    energy_SCF = SCF_energy
+    Delta_energy_SCF = energy_SCF - energy_SCF_previous
+
+    !double precision :: level_shift_save
+    !level_shift_save = level_shift
+    !mo_coef_save(1:ao_num,1:mo_num) = mo_coef(1:ao_num,1:mo_num)
+    !do while (Delta_energy_SCF > 0.d0)
+    !  mo_coef(1:ao_num,1:mo_num) = mo_coef_save
+    !  if (level_shift <= .1d0) then
+    !    level_shift = 1.d0
+    !  else
+    !    level_shift = level_shift * 3.0d0
+    !  endif
+    !  TOUCH mo_coef level_shift
+    !  mo_coef(1:ao_num,1:mo_num) = eigenvectors_Fock_matrix_MO(1:ao_num,1:mo_num)
+    !  if(frozen_orb_scf)then
+    !    call reorder_core_orb
+    !    call initialize_mo_coef_begin_iteration
+    !  endif
+    !  TOUCH mo_coef
+    !  Delta_energy_SCF = SCF_energy - energy_SCF_previous
+    !  energy_SCF = SCF_energy
+    !  if (level_shift-level_shift_save > 40.d0) then
+    !    level_shift = level_shift_save * 4.d0
+    !    SOFT_TOUCH level_shift
+    !    exit
+    !  endif
+    !enddo
+    !level_shift = level_shift * 0.5d0
+    !SOFT_TOUCH level_shift
+
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, level_shift, dim_DIIS
+
+    if(Delta_energy_SCF < 0.d0) then
+      call save_mos()
+    endif
+    if(qp_stop()) exit
+
+  enddo
+
+  if (iteration_SCF < n_it_SCF_max) then
+    mo_label = 'Canonical'
+  endif
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================','================'
+  write(6,*)
+
+  if(.not.frozen_orb_scf)then
+   call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo,size(Fock_matrix_mo,1), &
+      size(Fock_matrix_mo,2),mo_label,1,.true.)
+   call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef,1), 1.d-10)
+   call orthonormalize_mos
+   call save_mos
+  endif
+
+  call write_double(6, energy_SCF, 'SCF energy')
+
+  call write_time(6)
+
+end
+
diff --git a/src/scf_utils/roothaan_hall_scf.irp.f b/src/scf_utils/roothaan_hall_scf.irp.f
index 2c35fe0d..9ec61ced 100644
--- a/src/scf_utils/roothaan_hall_scf.irp.f
+++ b/src/scf_utils/roothaan_hall_scf.irp.f
@@ -29,11 +29,11 @@ END_DOC
 
   call write_time(6)
 
-  print*,'Energy of the guess = ',SCF_energy
+  print*,'energy of the guess = ',SCF_energy
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
     '====','================','================','================','================'
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
-    '  N ', 'Energy  ', 'Energy diff  ',  'DIIS error  ', 'Level shift   '
+    '  N ', 'energy  ', 'energy diff  ',  'DIIS error  ', 'Level shift   '
   write(6,'(A4, 1X, A16, 1X, A16, 1X, A16, 1X, A16)')  &
     '====','================','================','================','================'
 
@@ -66,7 +66,8 @@ END_DOC
 
     dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
 
-    if ( (scf_algorithm == 'DIIS').and.(dabs(Delta_energy_SCF) > 1.d-6) )  then
+    if( (scf_algorithm == 'DIIS') .and. (dabs(Delta_energy_SCF) > 1.d-6))  then
+    !if(scf_algorithm == 'DIIS') then
 
       ! Store Fock and error matrices at each iteration
       index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
@@ -85,10 +86,9 @@ END_DOC
           iteration_SCF,dim_DIIS                                       &
           )
 
-      Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
-      Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
+      Fock_matrix_AO_alpha = Fock_matrix_AO!*0.5d0
+      Fock_matrix_AO_beta  = Fock_matrix_AO!*0.5d0
       TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta
-
     endif
 
     MO_coef = eigenvectors_Fock_matrix_MO
@@ -99,18 +99,14 @@ END_DOC
 
     TOUCH MO_coef
 
-!   Calculate error vectors
-
-    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
-
 !   SCF energy
 
     energy_SCF = SCF_energy
-    Delta_Energy_SCF = energy_SCF - energy_SCF_previous
-    if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then
+    Delta_energy_SCF = energy_SCF - energy_SCF_previous
+    if ( (SCF_algorithm == 'DIIS').and.(Delta_energy_SCF > 0.d0) ) then
       Fock_matrix_AO(1:ao_num,1:ao_num) = Fock_matrix_DIIS (1:ao_num,1:ao_num,index_dim_DIIS)
-      Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
-      Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
+      Fock_matrix_AO_alpha = Fock_matrix_AO!*0.5d0
+      Fock_matrix_AO_beta  = Fock_matrix_AO!*0.5d0
       TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta
     endif
 
@@ -131,19 +127,24 @@ END_DOC
         call initialize_mo_coef_begin_iteration
       endif
       TOUCH mo_coef
-      Delta_Energy_SCF = SCF_energy - energy_SCF_previous
+      Delta_energy_SCF = SCF_energy - energy_SCF_previous
       energy_SCF = SCF_energy
       if (level_shift-level_shift_save > 40.d0) then
         level_shift = level_shift_save * 4.d0
         SOFT_TOUCH level_shift
         exit
       endif
+
       dim_DIIS=0
     enddo
+
     level_shift = level_shift * 0.5d0
     SOFT_TOUCH level_shift
     energy_SCF_previous = energy_SCF
 
+!   Calculate error vectors
+    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
+
 !   Print results at the end of each iteration
 
     write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
@@ -175,7 +176,7 @@ END_DOC
    call save_mos
   endif
 
-  call write_double(6, Energy_SCF, 'SCF energy')
+  call write_double(6, energy_SCF, 'SCF energy')
 
   call write_time(6)
 
diff --git a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
index 60201f5f..eb812401 100644
--- a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
+++ b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.f
@@ -14,21 +14,36 @@ program save_bitcpsileft_for_qmcchem
 
   e_ref = 0.d0
   iunit = 13
-  open(unit=iunit,file=trim(ezfio_filename)//'/simulation/e_ref',action='write')
-  call ezfio_has_fci_energy_pt2(exists)
-
-  if(.not.exists) then
-    call ezfio_has_fci_energy(exists)
+  open(unit=iunit, file=trim(ezfio_filename)//'/simulation/e_ref', action='write')
 
+    call ezfio_has_fci_energy_pt2(exists)
     if(.not.exists) then
-      call ezfio_has_tc_scf_bitc_energy(exists)
-      if(exists) then
-        call ezfio_get_tc_scf_bitc_energy(e_ref)
+
+      call ezfio_has_fci_energy(exists)
+      if(.not.exists) then
+
+        call ezfio_has_cisd_energy(exists)
+        if(.not.exists) then
+
+          call ezfio_has_tc_scf_bitc_energy(exists)
+          if(exists) then
+            call ezfio_get_tc_scf_bitc_energy(e_ref)
+          endif
+
+        else
+          call ezfio_get_cisd_energy(e_ref)
+        endif
+
+      else
+        call ezfio_get_fci_energy(e_ref)
       endif
+
+      else
+        call ezfio_get_fci_energy_pt2(e_ref)
     endif
 
-  endif
-  write(iunit,*) e_ref
+    write(iunit,*) e_ref
+
   close(iunit)
 
 end
diff --git a/src/tc_bi_ortho/tc_som.irp.f b/src/tc_bi_ortho/tc_som.irp.f
new file mode 100644
index 00000000..291c52ef
--- /dev/null
+++ b/src/tc_bi_ortho/tc_som.irp.f
@@ -0,0 +1,70 @@
+! ---
+
+program tc_som
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting ...'
+  print *, ' do not forget to do tc-scf first'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf 
+  print *, ' mu = ', mu_erf
+  PROVIDE j1b_type
+  print *, ' j1b_type = ', j1b_type
+  print *, j1b_pen
+
+  read_wf = .true.
+  touch read_wf
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer          :: i, i_HF, degree
+  double precision :: hmono_1, htwoe_1, hthree_1, htot_1
+  double precision :: hmono_2, htwoe_2, hthree_2, htot_2
+  double precision :: U_SOM
+
+  PROVIDE N_int N_det
+
+  do i = 1, N_det
+    call get_excitation_degree(HF_bitmask, psi_det(1,1,i), degree, N_int)
+    if(degree == 0) then
+      i_HF = i
+      exit
+    endif
+  enddo
+  print *, ' HF determinants:', i_HF
+  print *, '          N_det :', N_det
+
+  U_SOM = 0.d0 
+  do i = 1, N_det
+    if(i == i_HF) cycle
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1)
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2)
+    U_SOM += htot_1 * htot_2
+  enddo
+  U_SOM = 0.5d0 * U_SOM
+  print *, ' U_SOM = ', U_SOM
+  
+  return
+end subroutine main
+
+! ---
+
diff --git a/src/tc_bi_ortho/test_tc_fock.irp.f b/src/tc_bi_ortho/test_tc_fock.irp.f
index a49a5958..ebd43a7a 100644
--- a/src/tc_bi_ortho/test_tc_fock.irp.f
+++ b/src/tc_bi_ortho/test_tc_fock.irp.f
@@ -15,7 +15,8 @@ program test_tc_fock
   !call routine_2
 !  call routine_3()
 
- call test_3e
+! call test_3e
+ call routine_tot
 end
 
 ! ---
@@ -32,7 +33,7 @@ subroutine test_3e
  print*,'htot  = ',htot
  print*,''
  print*,''
- print*,'TC_one= ',TC_HF_one_electron_energy
+ print*,'TC_one= ',tc_hf_one_e_energy
  print*,'TC_two= ',TC_HF_two_e_energy
  print*,'TC_3e = ',diag_three_elem_hf
  print*,'TC_tot= ',TC_HF_energy
@@ -84,8 +85,8 @@ subroutine routine_3()
         print*, i, a
         stop
        endif
-       !print*, ' excited det'
-       !call debug_det(det_i, N_int)
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
  
        call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
        if(dabs(hthree).lt.1.d-10)cycle
@@ -116,3 +117,78 @@ subroutine routine_3()
 end subroutine routine_3
 
 ! ---
+subroutine routine_tot()
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i, a, i_ok, s1,other_spin(2)
+  double precision               :: hmono, htwoe, hthree, htilde_ij
+  double precision               :: err_ai, err_tot, ref, new
+  integer(bit_kind), allocatable :: det_i(:,:)
+
+  allocate(det_i(N_int,2))
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  err_tot = 0.d0
+ 
+!  do s1 = 1, 2
+   s1 = 2
+   det_i = ref_bitmask
+   call debug_det(det_i, N_int)
+   print*, ' HF det'
+   call debug_det(det_i, N_int)
+ 
+!   do i = 1, elec_num_tab(s1)
+!     do a = elec_num_tab(s1)+1, mo_num ! virtual 
+   do i = 1, elec_beta_num
+     do a = elec_beta_num+1, elec_alpha_num! virtual 
+!   do i = elec_beta_num+1, elec_alpha_num
+!     do a = elec_alpha_num+1, mo_num! virtual 
+       print*,i,a 
+ 
+       det_i = ref_bitmask
+       call do_single_excitation(det_i, i, a, s1, i_ok)
+       if(i_ok == -1) then
+        print*, 'PB !!'
+        print*, i, a
+        stop
+       endif
+ 
+       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       print*,htilde_ij
+       if(dabs(htilde_ij).lt.1.d-10)cycle
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
+
+       if(s1 == 1)then
+        new = Fock_matrix_tc_mo_alpha(a,i)
+       else
+        new = Fock_matrix_tc_mo_beta(a,i)
+       endif
+       ref = htilde_ij
+!       if(s1 == 1)then
+!        new = fock_a_tot_3e_bi_orth(a,i)
+!       else if(s1 == 2)then
+!        new = fock_b_tot_3e_bi_orth(a,i)
+!       endif
+       err_ai = dabs(dabs(ref) - dabs(new))
+       if(err_ai .gt. 1d-7) then
+         print*,'s1 = ',s1
+         print*, ' warning on', i, a
+         print*, ref,new,err_ai
+       endif
+       print*, ref,new,err_ai
+       err_tot += err_ai
+ 
+       write(22, *) htilde_ij
+     enddo
+   enddo
+!  enddo
+
+  print *, ' err_tot = ', err_tot
+
+  deallocate(det_i)
+
+end subroutine routine_3
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index e506d1cc..eb8fa8be 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -86,7 +86,7 @@ default: False
 type: Threshold
 doc: Threshold on the convergence of the Hartree Fock energy.
 interface: ezfio,provider,ocaml
-default: 1.e-10
+default: 1.e-12
 
 [n_it_tcscf_max]
 type: Strictly_positive_int
@@ -134,5 +134,53 @@ default: False
 type: integer
 doc: nb of Gaussians used to fit Jastrow fcts
 interface: ezfio,provider,ocaml
-default: 6
+default: 20
+
+[max_dim_diis_tcscf]
+type: integer
+doc: Maximum size of the DIIS extrapolation procedure
+interface: ezfio,provider,ocaml
+default: 15
+
+[threshold_diis_tcscf]
+type: Threshold
+doc: Threshold on the convergence of the DIIS error vector during a TCSCF calculation. If 0. is chosen, the square root of thresh_tcscf will be used.
+interface: ezfio,provider,ocaml
+default: 0.
+
+[level_shift_tcscf]
+type: Positive_float
+doc: Energy shift on the virtual MOs to improve TCSCF convergence
+interface: ezfio,provider,ocaml
+default: 0.
+
+[tcscf_algorithm]
+type: character*(32)
+doc: Type of TCSCF algorithm used. Possible choices are [Simple | DIIS]
+interface: ezfio,provider,ocaml
+default: Simple 
+
+[im_thresh_tcscf]
+type: Threshold
+doc: Thresholds on the Imag part of energy
+interface: ezfio,provider,ocaml
+default: 1.e-7
+
+[test_cycle_tc]
+type: logical
+doc: If |true|, the integrals of the three-body jastrow are computed with cycles
+interface: ezfio,provider,ocaml
+default: False
+
+[thresh_biorthog_diag]
+type: Threshold
+doc: Threshold to determine if diagonal elements of the bi-orthogonal condition L.T x R are close enouph to 1
+interface: ezfio,provider,ocaml
+default: 1.e-6
+
+[thresh_biorthog_nondiag]
+type: Threshold
+doc: Threshold to determine if non-diagonal elements of L.T x R are close enouph to 0
+interface: ezfio,provider,ocaml
+default: 1.e-6
 
diff --git a/src/tc_scf/diago_bi_ort_tcfock.irp.f b/src/tc_scf/diago_bi_ort_tcfock.irp.f
index 856b7382..726169d9 100644
--- a/src/tc_scf/diago_bi_ort_tcfock.irp.f
+++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f
@@ -1,3 +1,5 @@
+! ---
+
  BEGIN_PROVIDER [ double precision, fock_tc_reigvec_mo, (mo_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, fock_tc_leigvec_mo, (mo_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, eigval_fock_tc_mo, (mo_num)]
@@ -9,32 +11,46 @@
 
   implicit none
   integer                       :: n_real_tc 
-  integer                       :: i, k, l
+  integer                       :: i, j, k, l
   double precision              :: accu_d, accu_nd, accu_tmp
-  double precision              :: thr_d, thr_nd
   double precision              :: norm
   double precision, allocatable :: eigval_right_tmp(:)
+  double precision, allocatable :: F_tmp(:,:)
 
-  thr_d  = 1d-6
-  thr_nd = 1d-6
-
-  allocate( eigval_right_tmp(mo_num) )
+  allocate( eigval_right_tmp(mo_num), F_tmp(mo_num,mo_num) )
 
   PROVIDE Fock_matrix_tc_mo_tot
 
-  call non_hrmt_bieig( mo_num, Fock_matrix_tc_mo_tot, thr_d, thr_nd &
-                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo       & 
+  do i = 1, mo_num
+    do j = 1, mo_num
+      F_tmp(j,i) = Fock_matrix_tc_mo_tot(j,i)
+    enddo
+  enddo
+  ! insert level shift here
+  do i = elec_beta_num+1, elec_alpha_num
+    F_tmp(i,i) += 0.5d0 * level_shift_tcscf
+  enddo
+  do i = elec_alpha_num+1, mo_num
+    F_tmp(i,i) += level_shift_tcscf
+  enddo
+
+  call non_hrmt_bieig( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag &
+                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo                       & 
                      , n_real_tc, eigval_right_tmp )
+
   !if(max_ov_tc_scf)then
-  ! call non_hrmt_fock_mat( mo_num, Fock_matrix_tc_mo_tot, thr_d, thr_nd &
-  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
+  ! call non_hrmt_fock_mat( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag &
+  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo                          & 
   !                    , n_real_tc, eigval_right_tmp )
   !else 
-  ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, Fock_matrix_tc_mo_tot &
+  ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, F_tmp     &
   !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
   !                    , n_real_tc, eigval_right_tmp )
   !endif
 
+  deallocate(F_tmp)
+
+
 !  if(n_real_tc .ne. mo_num)then
 !   print*,'n_real_tc ne mo_num ! ',n_real_tc
 !   stop
@@ -42,9 +58,12 @@
 
   eigval_fock_tc_mo = eigval_right_tmp
 !  print*,'Eigenvalues of Fock_matrix_tc_mo_tot'
-!  do i = 1, mo_num
+!  do i = 1, elec_alpha_num
 !    print*, i, eigval_fock_tc_mo(i)
 !  enddo
+!  do i = elec_alpha_num+1, mo_num 
+!    print*, i, eigval_fock_tc_mo(i) - level_shift_tcscf
+!  enddo
 !  deallocate( eigval_right_tmp )
 
   ! L.T x R 
@@ -53,6 +72,8 @@
             , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
             , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
 
+  ! ---
+
   accu_d  = 0.d0
   accu_nd = 0.d0
   do i = 1, mo_num
@@ -63,45 +84,80 @@
       else
         accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
         accu_nd += accu_tmp * accu_tmp
-        if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thr_nd)then
+        if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then
          print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
         endif
       endif
     enddo 
   enddo
-  accu_nd = dsqrt(accu_nd)/accu_d
-
-  if(accu_nd .gt. thr_nd) then
+  accu_nd = dsqrt(accu_nd) / accu_d
+  if(accu_nd .gt. thresh_biorthog_nondiag) then
     print *, ' bi-orthog failed'
-    print*,'accu_nd MO = ', accu_nd, thr_nd
-    print*,'overlap_fock_tc_eigvec_mo = '
+    print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag
+    print *, ' overlap_fock_tc_eigvec_mo = '
     do i = 1, mo_num
       write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
     enddo
-   stop
+    stop
   endif
 
-  if( dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thr_d ) then
-    print *, 'mo_num     = ', mo_num 
-    print *, 'accu_d  MO = ', accu_d, thr_d
-    print *, 'normalizing vectors ...'
+  ! ---
+
+  if(dabs(accu_d - dble(mo_num))/dble(mo_num) .gt. thresh_biorthog_diag) then
+
+    print *, ' mo_num     = ', mo_num 
+    print *, ' accu_d  MO = ', accu_d, thresh_biorthog_diag
+    print *, ' normalizing vectors ...'
     do i = 1, mo_num
       norm = dsqrt(dabs(overlap_fock_tc_eigvec_mo(i,i)))
-      if(norm .gt. thr_d) then
+      if(norm .gt. thresh_biorthog_diag) then
         do k = 1, mo_num
           fock_tc_reigvec_mo(k,i) *= 1.d0/norm
           fock_tc_leigvec_mo(k,i) *= 1.d0/norm
         enddo
       endif
     enddo
+
     call dgemm( "T", "N", mo_num, mo_num, mo_num, 1.d0          &
               , fock_tc_leigvec_mo, size(fock_tc_leigvec_mo, 1) &
               , fock_tc_reigvec_mo, size(fock_tc_reigvec_mo, 1) &
               , 0.d0, overlap_fock_tc_eigvec_mo, size(overlap_fock_tc_eigvec_mo, 1) )
+
+    accu_d  = 0.d0
+    accu_nd = 0.d0
+    do i = 1, mo_num
+      do k = 1, mo_num
+        if(i==k) then
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_d  += dabs(accu_tmp)
+        else
+          accu_tmp = overlap_fock_tc_eigvec_mo(k,i)
+          accu_nd += accu_tmp * accu_tmp
+          if(dabs(overlap_fock_tc_eigvec_mo(k,i)) .gt. thresh_biorthog_nondiag)then
+           print *, 'k,i', k, i, overlap_fock_tc_eigvec_mo(k,i)
+          endif
+        endif
+      enddo 
+    enddo
+    accu_nd = dsqrt(accu_nd) / accu_d
+    if(accu_nd .gt. thresh_biorthog_diag) then
+      print *, ' bi-orthog failed'
+      print *, ' accu_nd MO = ', accu_nd, thresh_biorthog_nondiag
+      print *, ' overlap_fock_tc_eigvec_mo = '
+      do i = 1, mo_num
+        write(*,'(100(F16.10,X))') overlap_fock_tc_eigvec_mo(i,:)
+      enddo
+      stop
+    endif
+
   endif
  
+  ! ---
+
 END_PROVIDER 
 
+! ---
+
  BEGIN_PROVIDER [ double precision, fock_tc_reigvec_ao, (ao_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, fock_tc_leigvec_ao, (ao_num, mo_num)]
 &BEGIN_PROVIDER [ double precision, overlap_fock_tc_eigvec_ao, (mo_num, mo_num) ]
@@ -117,6 +173,7 @@ END_PROVIDER
   double precision              :: accu, accu_d
   double precision, allocatable :: tmp(:,:)
 
+  PROVIDE mo_l_coef mo_r_coef
 
 !  ! MO_R x R
    call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
diff --git a/src/tc_scf/diis_tcscf.irp.f b/src/tc_scf/diis_tcscf.irp.f
new file mode 100644
index 00000000..ff1077f5
--- /dev/null
+++ b/src/tc_scf/diis_tcscf.irp.f
@@ -0,0 +1,186 @@
+! ---
+
+BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero_TCSCF ]
+ 
+  implicit none
+ 
+  if(threshold_DIIS_TCSCF == 0.d0) then
+    threshold_DIIS_nonzero_TCSCF = dsqrt(thresh_tcscf)
+  else
+    threshold_DIIS_nonzero_TCSCF = threshold_DIIS_TCSCF
+  endif
+  ASSERT(threshold_DIIS_nonzero_TCSCF >= 0.d0)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, Q_alpha, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_alpha = mo_r_coef x eta_occ_alpha x mo_l_coef.T
+  !
+  ! [Q_alpha]_ij = \sum_{k=1}^{elec_alpha_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  Q_alpha = 0.d0
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_alpha_num, 1.d0               &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_alpha, size(Q_alpha, 1) )
+
+END_PROVIDER
+  
+! ---
+    
+BEGIN_PROVIDER [ double precision, Q_beta, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_beta = mo_r_coef x eta_occ_beta x mo_l_coef.T
+  !
+  ! [Q_beta]_ij = \sum_{k=1}^{elec_beta_num} [mo_r_coef]_ik [mo_l_coef]_jk
+  !
+  END_DOC
+
+  implicit none
+
+  Q_beta = 0.d0
+  call dgemm( 'N', 'T', ao_num, ao_num, elec_beta_num, 1.d0                &
+            , mo_r_coef, size(mo_r_coef, 1), mo_l_coef, size(mo_l_coef, 1) &
+            , 0.d0, Q_beta, size(Q_beta, 1) )
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Q_matrix, (ao_num, ao_num) ]
+
+  BEGIN_DOC
+  !
+  ! Q_matrix = 2 mo_r_coef x eta_occ x mo_l_coef.T
+  ! 
+  ! with: 
+  !                        | 1   if i = j = 1, ..., nb of occ orbitals
+  !        [eta_occ]_ij =  |     
+  !                        | 0   otherwise
+  !
+  ! the diis error is defines as:
+  !                         e = F_ao x Q x ao_overlap - ao_overlap x Q x F_ao
+  ! with: 
+  !       mo_l_coef.T x ao_overlap x mo_r_coef = I
+  !       F_mo = mo_l_coef.T x F_ao x mo_r_coef
+  !       F_ao = (ao_overlap x mo_r_coef) x F_mo x (ao_overlap x mo_l_coef).T
+  !
+  ! ==> e = 2 ao_overlap x mo_r_coef x [ F_mo x eta_occ - eta_occ x F_mo ] x (ao_overlap x mo_l_coef).T
+  !
+  !      at convergence:
+  !                                      F_mo x eta_occ - eta_occ x F_mo = 0
+  !                                  ==> [F_mo]_ij ([eta_occ]_ii - [eta_occ]_jj) = 0  
+  !                                  ==> [F_mo]_ia = [F_mo]_ai = 0 where: i = occ and a = vir
+  !                                  ==> Brillouin conditions
+  !
+  END_DOC
+
+  implicit none
+
+  if(elec_alpha_num == elec_beta_num) then
+    Q_matrix = Q_alpha + Q_alpha
+  else
+    Q_matrix = Q_alpha + Q_beta
+  endif
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_ao, (ao_num, ao_num)]
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  allocate(tmp(ao_num,ao_num))
+
+  ! F x Q
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                             &
+            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0             &
+            , tmp, size(tmp, 1), ao_overlap, size(ao_overlap, 1) &
+            , 0.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  ! S x Q
+  tmp = 0.d0
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                       &
+            , ao_overlap, size(ao_overlap, 1), Q_matrix, size(Q_matrix, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  ! F x Q x S - S x Q x F
+  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, -1.d0                                  &
+            , tmp, size(tmp, 1), Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+            , 1.d0, FQS_SQF_ao, size(FQS_SQF_ao, 1) )
+
+  deallocate(tmp)
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, FQS_SQF_mo, (mo_num, mo_num)]
+
+  implicit none
+
+  call ao_to_mo_bi_ortho( FQS_SQF_ao, size(FQS_SQF_ao, 1) &
+                        , FQS_SQF_mo, size(FQS_SQF_mo, 1) )
+
+END_PROVIDER
+
+! ---
+
+! BEGIN_PROVIDER [ double precision, eigenval_Fock_tc_ao, (ao_num) ]
+!&BEGIN_PROVIDER [ double precision, eigenvec_Fock_tc_ao, (ao_num,ao_num) ]
+!
+!  BEGIN_DOC
+!  !
+!  ! Eigenvalues and eigenvectors of the Fock matrix over the ao basis
+!  !
+!  ! F' = X.T x F x X   where X = ao_overlap^(-1/2)
+!  ! 
+!  ! F'   x Cr' = Cr' x E ==> F   Cr = Cr x E with Cr = X x Cr'
+!  ! F'.T x Cl' = Cl' x E ==> F.T Cl = Cl x E with Cl = X x Cl'
+!  !
+!  END_DOC
+!
+!  implicit none
+!  double precision, allocatable :: tmp1(:,:), tmp2(:,:)
+!
+!  ! ---
+!  ! Fock matrix in orthogonal basis: F' = X.T x F x X
+!
+!  allocate(tmp1(ao_num,ao_num))
+!  call dgemm( 'N', 'N', ao_num, ao_num, ao_num, 1.d0                                                 &
+!            , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), S_half_inv, size(S_half_inv, 1) &
+!            , 0.d0, tmp1, size(tmp1, 1) )
+!
+!  allocate(tmp2(ao_num,ao_num))
+!  call dgemm( 'T', 'N', ao_num, ao_num, ao_num, 1.d0               &
+!            , S_half_inv, size(S_half_inv, 1), tmp1, size(tmp1, 1) &
+!            , 0.d0, tmp2, size(tmp2, 1) )
+!
+!  ! ---
+!
+!  ! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues
+!  ! TODO
+!
+!  ! Back-transform eigenvectors: C =X.C'
+!
+!END_PROVIDER
+
+! ---
+
+~                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    
diff --git a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
new file mode 100644
index 00000000..fccfd837
--- /dev/null
+++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@@ -0,0 +1,405 @@
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_cs, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_cs ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_mo_cs = 0.d0
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+   
+      do j = 1, elec_beta_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_cs(b,a) -= 0.5d0 * ( 4.d0 * I_bij_aij &
+                                            +        I_bij_ija &
+                                            +        I_bij_jai &
+                                            - 2.d0 * I_bij_aji &
+                                            - 2.d0 * I_bij_iaj &
+                                            - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_cs =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_a ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_a = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj &
+                                           - 2.d0 * I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           +        I_bij_ija &
+                                           +        I_bij_jai &
+                                           -        I_bij_aji &
+                                           - 2.d0 * I_bij_iaj &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_a(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           + I_bij_ija &
+                                           + I_bij_jai &
+                                           - I_bij_aji &
+                                           - I_bij_iaj &
+                                           - I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
+
+  implicit none
+  integer          :: a, b, i, j, o
+  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
+  double precision :: ti, tf
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' PROVIDING fock_3e_uhf_mo_b ...'
+  call wall_time(ti)
+
+  o = elec_beta_num + 1
+
+  fock_3e_uhf_mo_b = fock_3e_uhf_mo_cs
+
+  do a = 1, mo_num
+    do b = 1, mo_num
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = 1, elec_beta_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_iaj )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = 1, elec_beta_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( 2.d0 * I_bij_aij &
+                                           -        I_bij_aji &
+                                           -        I_bij_jia )
+
+        enddo
+      enddo
+
+      ! ---
+
+      do j = o, elec_alpha_num
+        do i = o, elec_alpha_num
+
+          call give_integrals_3_body_bi_ort(b, i, j, a, i, j, I_bij_aij)
+          call give_integrals_3_body_bi_ort(b, i, j, i, j, a, I_bij_ija)
+          call give_integrals_3_body_bi_ort(b, i, j, j, a, i, I_bij_jai)
+          call give_integrals_3_body_bi_ort(b, i, j, a, j, i, I_bij_aji)
+          call give_integrals_3_body_bi_ort(b, i, j, i, a, j, I_bij_iaj)
+          call give_integrals_3_body_bi_ort(b, i, j, j, i, a, I_bij_jia)
+
+          fock_3e_uhf_mo_b(b,a) -= 0.5d0 * ( I_bij_aij &
+                                           - I_bij_aji )
+
+        enddo
+      enddo
+
+      ! ---
+
+    enddo
+  enddo
+
+  call wall_time(tf)
+  !print *, ' total Wall time for fock_3e_uhf_mo_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_a, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
+
+  print *, ' PROVIDING fock_3e_uhf_ao_a ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_a = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_a)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_a * dm_dk_a * i_mugd_eknu &
+                                      + dm_ge_a * dm_dk_a * i_mugd_knue &
+                                      - dm_ge_a * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_a * i_mugd_kenu &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_a(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_a =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [double precision, fock_3e_uhf_ao_b, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! Equations (B6) and (B7)
+  !
+  ! g <--> gamma
+  ! d <--> delta
+  ! e <--> eta
+  ! k <--> kappa
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: g, d, e, k, mu, nu
+  double precision              :: dm_ge_a, dm_ge_b, dm_ge
+  double precision              :: dm_dk_a, dm_dk_b, dm_dk
+  double precision              :: i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu
+  double precision              :: ti, tf
+  double precision, allocatable :: f_tmp(:,:)
+
+  print *, ' PROVIDING fock_3e_uhf_ao_b ...'
+  call wall_time(ti)
+
+  fock_3e_uhf_ao_b = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                &
+ !$OMP PRIVATE (g, e, d, k, mu, nu, dm_ge_a, dm_ge_b, dm_ge, dm_dk_a, dm_dk_b, dm_dk, f_tmp,  &
+ !$OMP          i_mugd_nuek, i_mugd_eknu, i_mugd_knue, i_mugd_nuke, i_mugd_enuk, i_mugd_kenu) &
+ !$OMP SHARED  (ao_num, TCSCF_bi_ort_dm_ao_alpha, TCSCF_bi_ort_dm_ao_beta, fock_3e_uhf_ao_b)
+
+  allocate(f_tmp(ao_num,ao_num))
+  f_tmp = 0.d0
+
+ !$OMP DO
+  do g = 1, ao_num
+    do e = 1, ao_num
+      dm_ge_a = TCSCF_bi_ort_dm_ao_alpha(g,e)
+      dm_ge_b = TCSCF_bi_ort_dm_ao_beta (g,e)
+      dm_ge   = dm_ge_a + dm_ge_b
+      do d = 1, ao_num
+        do k = 1, ao_num
+          dm_dk_a = TCSCF_bi_ort_dm_ao_alpha(d,k)
+          dm_dk_b = TCSCF_bi_ort_dm_ao_beta (d,k)
+          dm_dk   = dm_dk_a + dm_dk_b
+          do mu = 1, ao_num
+            do nu = 1, ao_num
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, e, k, i_mugd_nuek)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, k, nu, i_mugd_eknu)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, nu, e, i_mugd_knue)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, nu, k, e, i_mugd_nuke)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, e, nu, k, i_mugd_enuk)
+              call give_integrals_3_body_bi_ort_ao(mu, g, d, k, e, nu, i_mugd_kenu)
+              f_tmp(mu,nu) -= 0.5d0 * ( dm_ge   * dm_dk   * i_mugd_nuek &
+                                      + dm_ge_b * dm_dk_b * i_mugd_eknu &
+                                      + dm_ge_b * dm_dk_b * i_mugd_knue &
+                                      - dm_ge_b * dm_dk   * i_mugd_enuk &
+                                      - dm_ge   * dm_dk_b * i_mugd_kenu &
+                                      - dm_ge_b * dm_dk_b * i_mugd_nuke &
+                                      - dm_ge_a * dm_dk_a * i_mugd_nuke )
+            enddo
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do mu = 1, ao_num
+    do nu = 1, ao_num
+      fock_3e_uhf_ao_b(mu,nu) += f_tmp(mu,nu)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(f_tmp)
+ !$OMP END PARALLEL
+
+  call wall_time(tf)
+  print *, ' total Wall time for fock_3e_uhf_ao_b =', tf - ti
+
+END_PROVIDER 
+
+! ---
+
diff --git a/src/tc_scf/fock_tc.irp.f b/src/tc_scf/fock_tc.irp.f
index 6b1c1d77..7403049c 100644
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@@ -1,63 +1,147 @@
 
 ! ---
 
- BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)]
-&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)]
- BEGIN_DOC
-! two_e_tc_non_hermit_integral_alpha(k,i) = <k| F^tc_alpha |i> 
-!
-! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
- END_DOC
+ BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_seq_beta , (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! two_e_tc_non_hermit_integral_seq_alpha(k,i) = <k| F^tc_alpha |i> 
+  !
+  ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
+  !
+  END_DOC
+
   implicit none
   integer          :: i, j, k, l
   double precision :: density, density_a, density_b
+  double precision :: t0, t1
 
-  two_e_tc_non_hermit_integral_alpha = 0.d0
-  two_e_tc_non_hermit_integral_beta  = 0.d0
+  !print*, ' providing two_e_tc_non_hermit_integral_seq ...'
+  !call wall_time(t0)
+
+  two_e_tc_non_hermit_integral_seq_alpha = 0.d0
+  two_e_tc_non_hermit_integral_seq_beta  = 0.d0
 
-  !! TODO :: parallelization properly done
   do i = 1, ao_num
     do k = 1, ao_num
-!!$OMP PARALLEL                  &
-!!$OMP DEFAULT (NONE)            &
-!!$OMP PRIVATE (j,l,density_a,density_b,density) & 
-!!$OMP SHARED (i,k,ao_num,SCF_density_matrix_ao_alpha,SCF_density_matrix_ao_beta,ao_non_hermit_term_chemist) & 
-!!$OMP SHARED (two_e_tc_non_hermit_integral_alpha,two_e_tc_non_hermit_integral_beta)
-!!$OMP DO SCHEDULE (dynamic)
       do j = 1, ao_num
         do l = 1, ao_num
 
           density_a = TCSCF_density_matrix_ao_alpha(l,j)
           density_b = TCSCF_density_matrix_ao_beta (l,j)
-          density   = density_a + density_b                      
+          density   = density_a + density_b
+
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho(l,j)   *      < k l| T | i j>
+          !two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          !!                                         rho_a(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !!                                         rho_b(l,j) *      < l k| T | i j>
+          !two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
 
           !                                         rho(l,j)   *      < k l| T | i j>
-          two_e_tc_non_hermit_integral_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
+          two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
           !                                         rho(l,j)   *      < k l| T | i j>
-          two_e_tc_non_hermit_integral_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
-          !                                         rho_a(l,j) *      < l k| T | i j>
-          two_e_tc_non_hermit_integral_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
-          !                                         rho_b(l,j) *      < l k| T | i j>
-          two_e_tc_non_hermit_integral_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
+          two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
+          !                                         rho_a(l,j) *      < k l| T | j i>
+          two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
+          !                                         rho_b(l,j) *      < k l| T | j i>
+          two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
 
         enddo
       enddo
-!!$OMP END DO
-!!$OMP END PARALLEL
     enddo
   enddo
 
+  !call wall_time(t1)
+  !print*, ' wall time for two_e_tc_non_hermit_integral_seq after = ', t1 - t0
+
+END_PROVIDER 
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_alpha, (ao_num, ao_num)]
+&BEGIN_PROVIDER [ double precision, two_e_tc_non_hermit_integral_beta , (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  ! two_e_tc_non_hermit_integral_alpha(k,i) = <k| F^tc_alpha |i> 
+  !
+  ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k, l
+  double precision              :: density, density_a, density_b, I_coul, I_kjli
+  double precision              :: t0, t1
+  double precision, allocatable :: tmp_a(:,:), tmp_b(:,:)
+
+  !print*, ' providing two_e_tc_non_hermit_integral ...'
+  !call wall_time(t0)
+
+  two_e_tc_non_hermit_integral_alpha = 0.d0
+  two_e_tc_non_hermit_integral_beta  = 0.d0
+
+ !$OMP PARALLEL DEFAULT (NONE)                                                                        &
+ !$OMP PRIVATE (i, j, k, l, density_a, density_b, density, tmp_a, tmp_b, I_coul, I_kjli)              &
+ !$OMP SHARED  (ao_num, TCSCF_density_matrix_ao_alpha, TCSCF_density_matrix_ao_beta, ao_two_e_tc_tot, &
+ !$OMP         two_e_tc_non_hermit_integral_alpha, two_e_tc_non_hermit_integral_beta)
+
+  allocate(tmp_a(ao_num,ao_num), tmp_b(ao_num,ao_num))
+  tmp_a = 0.d0
+  tmp_b = 0.d0
+
+ !$OMP DO
+  do j = 1, ao_num
+    do l = 1, ao_num
+      density_a = TCSCF_density_matrix_ao_alpha(l,j)
+      density_b = TCSCF_density_matrix_ao_beta (l,j)
+      density   = density_a + density_b                      
+      do i = 1, ao_num
+        do k = 1, ao_num
+
+          I_coul = density * ao_two_e_tc_tot(k,i,l,j)
+          I_kjli = ao_two_e_tc_tot(k,j,l,i)
+
+          tmp_a(k,i) += I_coul - density_a * I_kjli
+          tmp_b(k,i) += I_coul - density_b * I_kjli
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO NOWAIT
+
+ !$OMP CRITICAL
+  do i = 1, ao_num
+    do j = 1, ao_num
+      two_e_tc_non_hermit_integral_alpha(j,i) += tmp_a(j,i)
+      two_e_tc_non_hermit_integral_beta (j,i) += tmp_b(j,i)
+    enddo
+  enddo
+ !$OMP END CRITICAL
+
+  deallocate(tmp_a, tmp_b)
+ !$OMP END PARALLEL
+
+  !call wall_time(t1)
+  !print*, ' wall time for two_e_tc_non_hermit_integral after = ', t1 - t0
+
 END_PROVIDER 
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_alpha, (ao_num, ao_num)]
-  implicit none
+
   BEGIN_DOC
- ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the AO basis
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the AO basis
   END_DOC
-  Fock_matrix_tc_ao_alpha =  ao_one_e_integrals_tc_tot &
-                          + two_e_tc_non_hermit_integral_alpha 
+
+  implicit none
+
+  Fock_matrix_tc_ao_alpha =  ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_alpha 
 
 END_PROVIDER 
 
@@ -66,102 +150,149 @@ END_PROVIDER
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_beta, (ao_num, ao_num)]
 
   BEGIN_DOC
- ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
   END_DOC
+
   implicit none
 
-  Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot &
-                         + two_e_tc_non_hermit_integral_beta 
+  Fock_matrix_tc_ao_beta = ao_one_e_integrals_tc_tot + two_e_tc_non_hermit_integral_beta 
 
 END_PROVIDER 
-! ---
-
-!BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
-!  implicit none
-!  BEGIN_DOC
-! ! Total alpha+beta TC Fock matrix : h_c + Two-e^TC terms on the AO basis
-!  END_DOC
-!  Fock_matrix_tc_ao_tot = 0.5d0 * (Fock_matrix_tc_ao_alpha + Fock_matrix_tc_ao_beta)
-!END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]
-  implicit none
+
   BEGIN_DOC
- ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  ! Total alpha TC Fock matrix : h_c + Two-e^TC terms on the MO basis
   END_DOC
-  if(bi_ortho)then
-   call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
-                         , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
-   if(three_body_h_tc)then
-    Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
-   endif
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  if(bi_ortho) then
+
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_alpha
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_a
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1))
+    !deallocate(tmp)
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+                          , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+    if(three_body_h_tc) then
+      !Fock_matrix_tc_mo_alpha += fock_a_tot_3e_bi_orth
+      Fock_matrix_tc_mo_alpha += fock_3e_uhf_mo_a
+    endif
+
   else
-   call ao_to_mo(  Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
+    call ao_to_mo( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
                  , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
+
   endif
+
 END_PROVIDER
 
 ! ---
 
 BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]
-  implicit none
+
   BEGIN_DOC
- ! Total beta  TC Fock matrix : h_c + Two-e^TC terms on the MO basis
+  ! Total beta TC Fock matrix : h_c + Two-e^TC terms on the MO basis
   END_DOC
-  if(bi_ortho)then
-   call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
-                         , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
-   if(three_body_h_tc)then
-    Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
-   endif
+
+  implicit none
+  double precision, allocatable :: tmp(:,:)
+
+  if(bi_ortho) then
+
+    !allocate(tmp(ao_num,ao_num))
+    !tmp = Fock_matrix_tc_ao_beta
+    !if(three_body_h_tc) then
+    !  tmp += fock_3e_uhf_ao_b
+    !endif
+    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1))
+    !deallocate(tmp)
+
+    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+                          , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+    if(three_body_h_tc) then
+      !Fock_matrix_tc_mo_beta += fock_b_tot_3e_bi_orth
+      Fock_matrix_tc_mo_beta += fock_3e_uhf_mo_b
+    endif
+
   else
-   call ao_to_mo(  Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
+
+    call ao_to_mo( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
                  , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
+
   endif
+
 END_PROVIDER
 
-! ---
-
-!BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_tot, (mo_num, mo_num)]
-!  implicit none
-!  BEGIN_DOC
-! ! Total alpha+beta  TC Fock matrix : h_c + Two-e^TC terms on the MO basis
-!  END_DOC
-!  Fock_matrix_tc_mo_tot = 0.5d0 * (Fock_matrix_tc_mo_alpha + Fock_matrix_tc_mo_beta)
-!  if(three_body_h_tc) then
-!    Fock_matrix_tc_mo_tot += fock_3_mat
-!  endif
-!  !call restore_symmetry(mo_num, mo_num, Fock_matrix_tc_mo_tot, mo_num, 1.d-10)
-!END_PROVIDER 
-
 ! ---
 
  BEGIN_PROVIDER [ double precision, grad_non_hermit_left]
 &BEGIN_PROVIDER [ double precision, grad_non_hermit_right]
 &BEGIN_PROVIDER [ double precision, grad_non_hermit]
- implicit none
+
+  implicit none
   integer :: i, k
-  grad_non_hermit_left = 0.d0
+
+  grad_non_hermit_left  = 0.d0
   grad_non_hermit_right = 0.d0
+
   do i = 1, elec_beta_num ! doc --> SOMO
     do k = elec_beta_num+1, elec_alpha_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      !grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      !grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
     enddo
   enddo
+
   do i = 1, elec_beta_num ! doc --> virt 
     do k = elec_alpha_num+1, mo_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
     enddo
   enddo
+
   do i = elec_beta_num+1, elec_alpha_num ! SOMO --> virt 
     do k = elec_alpha_num+1, mo_num
-      grad_non_hermit_left+= dabs(Fock_matrix_tc_mo_tot(k,i))
-      grad_non_hermit_right+= dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  = max(grad_non_hermit_left , dabs(Fock_matrix_tc_mo_tot(k,i)))
+      grad_non_hermit_right = max(grad_non_hermit_right, dabs(Fock_matrix_tc_mo_tot(i,k)))
+      !grad_non_hermit_left  += dabs(Fock_matrix_tc_mo_tot(k,i))
+      !grad_non_hermit_right += dabs(Fock_matrix_tc_mo_tot(i,k))
+      grad_non_hermit_left  += Fock_matrix_tc_mo_tot(k,i) * Fock_matrix_tc_mo_tot(k,i)
+      grad_non_hermit_right += Fock_matrix_tc_mo_tot(i,k) * Fock_matrix_tc_mo_tot(i,k)
     enddo
   enddo
- grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right
+
+  !grad_non_hermit = dsqrt(grad_non_hermit_left) + dsqrt(grad_non_hermit_right)
+  grad_non_hermit = grad_non_hermit_left + grad_non_hermit_right
+
 END_PROVIDER 
+
+! ---
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_tc_ao_tot, (ao_num, ao_num) ]
+
+  implicit none
+
+  call mo_to_ao_bi_ortho( Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) &
+                        , Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) )
+
+END_PROVIDER
+
+! ---
+
+
diff --git a/src/tc_scf/fock_tc_mo_tot.irp.f b/src/tc_scf/fock_tc_mo_tot.irp.f
index a99c7698..2f33cd17 100644
--- a/src/tc_scf/fock_tc_mo_tot.irp.f
+++ b/src/tc_scf/fock_tc_mo_tot.irp.f
@@ -73,6 +73,29 @@
              + (Fock_matrix_tc_mo_beta(i,j) - Fock_matrix_tc_mo_alpha(i,j))
        enddo
      enddo
+     if(three_body_h_tc)then
+      ! C-O
+      do j = 1, elec_beta_num
+       do i = elec_beta_num+1, elec_alpha_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! C-V
+      do j = 1, elec_beta_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+      ! O-V
+      do j = elec_beta_num+1, elec_alpha_num
+       do i = elec_alpha_num+1, mo_num
+        Fock_matrix_tc_mo_tot(i,j) += 0.5d0*(fock_a_tot_3e_bi_orth(i,j) + fock_b_tot_3e_bi_orth(i,j))
+        Fock_matrix_tc_mo_tot(j,i) += 0.5d0*(fock_a_tot_3e_bi_orth(j,i) + fock_b_tot_3e_bi_orth(j,i))
+       enddo
+      enddo
+     endif
 
    endif
 
diff --git a/src/tc_scf/fock_three.irp.f b/src/tc_scf/fock_three.irp.f
index f73a5049..424eeffd 100644
--- a/src/tc_scf/fock_three.irp.f
+++ b/src/tc_scf/fock_three.irp.f
@@ -70,52 +70,76 @@ subroutine give_fock_ia_three_e_total(i,a,contrib)
 
 end
 
+! ---
+
 BEGIN_PROVIDER [double precision, diag_three_elem_hf]
- implicit none
- integer :: i,j,k,ipoint,mm
- double precision :: contrib,weight,four_third,one_third,two_third,exchange_int_231
- print*,'providing diag_three_elem_hf'
- if(.not.three_body_h_tc)then
-  diag_three_elem_hf = 0.d0
- else
-  if(.not.bi_ortho)then
-   one_third = 1.d0/3.d0
-   two_third = 2.d0/3.d0
-   four_third = 4.d0/3.d0
-   diag_three_elem_hf = 0.d0
-   do i = 1, elec_beta_num
-    do j = 1, elec_beta_num
-     do k = 1, elec_beta_num
-      call  give_integrals_3_body(k,j,i,j,i,k,exchange_int_231)   
-      diag_three_elem_hf += two_third * exchange_int_231
-     enddo
-    enddo
-   enddo
-   do mm = 1, 3
-    do ipoint = 1, n_points_final_grid
-     weight = final_weight_at_r_vector(ipoint)                                                                          
-     contrib   = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm)  & 
-                -2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm)                                 & 
-                -1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm)
-     contrib  *= four_third
-     contrib  += -two_third  * fock_3_rho_beta(ipoint)     * fock_3_w_kl_w_kl(ipoint,mm) & 
-                - four_third * fock_3_w_kk_sum(ipoint,mm)  * fock_3_w_kl_mo_k_mo_l(ipoint,mm)
-     diag_three_elem_hf += weight * contrib
-    enddo
-   enddo
-   diag_three_elem_hf = - diag_three_elem_hf
+
+  implicit none
+  integer          :: i, j, k, ipoint, mm
+  double precision :: contrib, weight, four_third, one_third, two_third, exchange_int_231
+  double precision :: integral_aaa, hthree, integral_aab, integral_abb, integral_bbb
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  !print *, ' providing diag_three_elem_hf'
+
+  if(.not. three_body_h_tc) then
+
+    diag_three_elem_hf = 0.d0
+
   else
-   double precision :: integral_aaa,hthree, integral_aab,integral_abb,integral_bbb
-   provide mo_l_coef mo_r_coef
-   call give_aaa_contrib(integral_aaa)
-   call give_aab_contrib(integral_aab)
-   call give_abb_contrib(integral_abb)
-   call give_bbb_contrib(integral_bbb)
-   diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+
+    if(.not. bi_ortho) then
+
+      ! ---
+
+      one_third  = 1.d0/3.d0
+      two_third  = 2.d0/3.d0
+      four_third = 4.d0/3.d0
+      diag_three_elem_hf = 0.d0
+      do i = 1, elec_beta_num
+        do j = 1, elec_beta_num
+          do k = 1, elec_beta_num
+            call give_integrals_3_body(k, j, i, j, i, k,exchange_int_231)   
+            diag_three_elem_hf += two_third * exchange_int_231
+          enddo
+        enddo
+      enddo
+      do mm = 1, 3
+        do ipoint = 1, n_points_final_grid
+          weight  = final_weight_at_r_vector(ipoint)                                                                          
+          contrib = 3.d0 * fock_3_w_kk_sum(ipoint,mm) * fock_3_rho_beta(ipoint) * fock_3_w_kk_sum(ipoint,mm) & 
+                  - 2.d0 * fock_3_w_kl_mo_k_mo_l(ipoint,mm) * fock_3_w_kk_sum(ipoint,mm)                     & 
+                  - 1.d0 * fock_3_rho_beta(ipoint) * fock_3_w_kl_w_kl(ipoint,mm)
+          contrib *= four_third
+          contrib += -two_third  * fock_3_rho_beta(ipoint)    * fock_3_w_kl_w_kl(ipoint,mm) & 
+                     -four_third * fock_3_w_kk_sum(ipoint,mm) * fock_3_w_kl_mo_k_mo_l(ipoint,mm)
+          diag_three_elem_hf += weight * contrib
+       enddo
+      enddo
+
+      diag_three_elem_hf = - diag_three_elem_hf
+
+      ! ---
+
+    else
+
+      provide mo_l_coef mo_r_coef
+      call give_aaa_contrib(integral_aaa)
+      call give_aab_contrib(integral_aab)
+      call give_abb_contrib(integral_abb)
+      call give_bbb_contrib(integral_bbb)
+      diag_three_elem_hf = integral_aaa + integral_aab + integral_abb + integral_bbb
+!      print*,'integral_aaa + integral_aab + integral_abb + integral_bbb'
+!      print*,integral_aaa , integral_aab , integral_abb , integral_bbb
+
+    endif
+
   endif
- endif
+
 END_PROVIDER 
 
+! ---
 
 BEGIN_PROVIDER [ double precision, fock_3_mat_a_op_sh, (mo_num, mo_num)]
  implicit none 
diff --git a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
index b0345957..f73171a3 100644
--- a/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
+++ b/src/tc_scf/fock_three_bi_ortho_new_new.irp.f
@@ -1,202 +1,286 @@
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_a_tot_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo,contrib
- fock_a_tot_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth(a,i)
-   fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i)
-   fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i)
+
+  implicit none
+  integer :: i, a
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_a_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_a_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp1_bi_ortho(a,i)
+      fock_a_tot_3e_bi_orth(a,i) += fock_a_tmp2_bi_ortho(a,i)
+    enddo
   enddo
- enddo
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_b_tot_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo,contrib
- fock_b_tot_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth(a,i)
-   fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i)
-   fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i)
+
+  implicit none
+  integer :: i, a
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tot_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      fock_b_tot_3e_bi_orth(a,i) += fock_cs_3e_bi_orth  (a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp2_bi_ortho(a,i)
+      fock_b_tot_3e_bi_orth(a,i) += fock_b_tmp1_bi_ortho(a,i)
+    enddo
   enddo
- enddo
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_cs_3e_bi_orth, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- double precision :: new
- fock_cs_3e_bi_orth = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = 1, elec_beta_num
-    do k = 1, elec_beta_num
-!      call contrib_3e_sss(a,i,j,k,contrib_sss)
-!      call contrib_3e_soo(a,i,j,k,contrib_soo)
-!      call contrib_3e_sos(a,i,j,k,contrib_sos)
-!      contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos
 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
-      call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
- ! negative terms :: exchange contrib
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
-      new  = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) & 
-             -1.5d0 * exch_13_int - exch_23_int
-      fock_cs_3e_bi_orth(a,i) += new
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_cs_3e_bi_orth = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = 1, elec_beta_num
+        do k = 1, elec_beta_num
+
+          !!call contrib_3e_sss(a,i,j,k,contrib_sss)
+          !!call contrib_3e_soo(a,i,j,k,contrib_soo)
+          !!call contrib_3e_sos(a,i,j,k,contrib_sos)
+          !!contrib = 0.5d0 * (contrib_sss + contrib_soo) + contrib_sos
+ 
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+
+          ! negative terms :: exchange contrib
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+
+          new = 2.d0 * direct_int + 0.5d0 * (c_3_int + c_minus_3_int - exch_12_int) -1.5d0 * exch_13_int - exch_23_int
+
+          fock_cs_3e_bi_orth(a,i) += new
+        enddo
+      enddo
     enddo
-   enddo
-  
   enddo
- enddo
- fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
+ 
+  fock_cs_3e_bi_orth = - fock_cs_3e_bi_orth
 
 END_PROVIDER 
 
+! ---
 
 BEGIN_PROVIDER [double precision, fock_a_tmp1_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- double precision :: new
- fock_a_tmp1_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = elec_beta_num + 1, elec_alpha_num 
-    do k = 1, elec_beta_num
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
-      call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
-      fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) &
-      + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
-    enddo
-   enddo
 
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss, contrib_sos, contrib_soo, contrib
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_a_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+    
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_beta_num
+          call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+          call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+          call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+          call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+          
+          fock_a_tmp1_bi_ortho(a,i) += 1.5d0 * (direct_int - exch_13_int) + 0.5d0 * (c_3_int + c_minus_3_int - exch_23_int - exch_12_int)
+        enddo
+      enddo
+    enddo
   enddo
- enddo
- fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
+
+  fock_a_tmp1_bi_ortho = - fock_a_tmp1_bi_ortho
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_a_tmp2_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_sss
- fock_a_tmp2_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   do j = 1, elec_alpha_num
-    do k = elec_beta_num+1, elec_alpha_num
-      call contrib_3e_sss(a,i,j,k,contrib_sss)
-      fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_sss
+
+  PROVIDE mo_l_coef mo_r_coef
+ 
+  fock_a_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_alpha_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call contrib_3e_sss(a, i, j, k, contrib_sss)
+
+          fock_a_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_sss
+        enddo
+      enddo
     enddo
-   enddo
   enddo
- enddo
+
 END_PROVIDER 
 
-
-
-
+! ---
 
 BEGIN_PROVIDER [double precision, fock_b_tmp1_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
- double precision :: new
- fock_b_tmp1_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   
-   do j = 1, elec_beta_num
-    do k = elec_beta_num+1, elec_alpha_num
-      call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
-      call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
-      call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
-      fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
-    enddo
-   enddo
 
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int
+  double precision :: new
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tmp1_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = 1, elec_beta_num
+        do k = elec_beta_num+1, elec_alpha_num
+          call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+          call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+          call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+
+          fock_b_tmp1_bi_ortho(a,i) += 1.5d0 * direct_int - 0.5d0 * exch_23_int - exch_13_int
+        enddo
+      enddo
+    enddo
   enddo
- enddo
- fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
+
+  fock_b_tmp1_bi_ortho = - fock_b_tmp1_bi_ortho
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [double precision, fock_b_tmp2_bi_ortho, (mo_num, mo_num)]
- implicit none
- integer :: i,a,j,k
- double precision :: contrib_soo
- fock_b_tmp2_bi_ortho = 0.d0
- do i = 1, mo_num
-  do a = 1, mo_num
-   do j = elec_beta_num + 1, elec_alpha_num 
-    do k = 1, elec_alpha_num
-      call contrib_3e_soo(a,i,j,k,contrib_soo)
-      fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+
+  implicit none
+  integer          :: i, a, j, k
+  double precision :: contrib_soo
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  fock_b_tmp2_bi_ortho = 0.d0
+
+  do i = 1, mo_num
+    do a = 1, mo_num
+      do j = elec_beta_num + 1, elec_alpha_num 
+        do k = 1, elec_alpha_num
+          call contrib_3e_soo(a, i, j, k, contrib_soo)
+
+          fock_b_tmp2_bi_ortho(a,i) += 0.5d0 * contrib_soo
+        enddo
+      enddo
     enddo
-   enddo
   enddo
- enddo
+
 END_PROVIDER 
 
-subroutine contrib_3e_sss(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
- call  give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
- integral = direct_int + c_3_int + c_minus_3_int 
- ! negative terms :: exchange contrib
- call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
- call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
- call  give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
- integral += - exch_13_int - exch_23_int  - exch_12_int 
- integral = -integral
+! ---
+
+subroutine contrib_3e_sss(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the pure same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int, exch_23_int, exch_12_int, c_3_int, c_minus_3_int
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )!!! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, i, k, c_3_int)      ! < a k j | j i k >
+  call give_integrals_3_body_bi_ort(a, k, j, k, j, i, c_minus_3_int)! < a k j | k j i >
+  integral = direct_int + c_3_int + c_minus_3_int 
+
+  ! negative terms :: exchange contrib
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)!!! < a k j | j k i > : E_13 
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)!!! < a k j | i j k > : E_23
+  call give_integrals_3_body_bi_ort(a, k, j, k, i, j, exch_12_int)!!! < a k j | k i j > : E_12
+  integral += - exch_13_int - exch_23_int  - exch_12_int 
+
+  integral = -integral
+
 end
 
+! ---
+
 subroutine contrib_3e_soo(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_23_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
- integral = direct_int - exch_23_int 
- integral = -integral
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / opposite spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_23_int
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int) ! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, i, j, k, exch_23_int)! < a k j | i j k > : E_23
+  integral = direct_int - exch_23_int 
+
+  integral = -integral
+
 end
 
-subroutine contrib_3e_sos(a,i,j,k,integral)
- implicit none
- integer, intent(in) :: a,i,j,k
- BEGIN_DOC
- ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
- END_DOC
- double precision, intent(out) :: integral
- double precision :: direct_int, exch_13_int
- call  give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
- call  give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
- integral = direct_int - exch_13_int 
- integral = -integral
+! ---
+
+subroutine contrib_3e_sos(a, i, j, k, integral)
+
+  BEGIN_DOC
+  ! returns the same spin / opposite spin / same spin contribution to F(a,i) from two orbitals j,k
+  END_DOC
+
+  PROVIDE mo_l_coef mo_r_coef
+
+  implicit none
+  integer,          intent(in)  :: a, i, j, k
+  double precision, intent(out) :: integral
+  double precision              :: direct_int, exch_13_int
+
+  call give_integrals_3_body_bi_ort(a, k, j, i, k, j, direct_int )! < a k j | i k j >
+  call give_integrals_3_body_bi_ort(a, k, j, j, k, i, exch_13_int)! < a k j | j k i > : E_13 
+  integral = direct_int - exch_13_int 
+
+  integral = -integral
+
 end
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf_diis.irp.f b/src/tc_scf/rh_tcscf_diis.irp.f
new file mode 100644
index 00000000..306c78b3
--- /dev/null
+++ b/src/tc_scf/rh_tcscf_diis.irp.f
@@ -0,0 +1,362 @@
+! ---
+
+subroutine rh_tcscf_diis()
+
+  implicit none
+
+  integer                       :: i, j, it
+  integer                       :: dim_DIIS, index_dim_DIIS
+  double precision              :: etc_tot, etc_1e, etc_2e, etc_3e, e_save, e_delta
+  double precision              :: tc_grad, g_save, g_delta, g_delta_th
+  double precision              :: level_shift_save, rate_th
+  double precision              :: t0, t1
+  double precision              :: er_DIIS, er_delta, er_save, er_delta_th
+  double precision, allocatable :: F_DIIS(:,:,:), E_DIIS(:,:,:)
+  double precision, allocatable :: mo_r_coef_save(:,:), mo_l_coef_save(:,:)
+
+  logical, external             :: qp_stop
+
+  it          = 0
+  e_save      = 0.d0
+  dim_DIIS    = 0
+  g_delta_th  = 1d0
+  er_delta_th = 1d0
+  rate_th     = 100.d0 !0.01d0 !0.2d0
+
+  allocate(mo_r_coef_save(ao_num,mo_num), mo_l_coef_save(ao_num,mo_num))
+  mo_l_coef_save = 0.d0
+  mo_r_coef_save = 0.d0
+
+  allocate(F_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF), E_DIIS(ao_num,ao_num,max_dim_DIIS_TCSCF))
+  F_DIIS = 0.d0
+  E_DIIS = 0.d0
+
+  call write_time(6)
+
+  ! ---
+
+  PROVIDE level_shift_TCSCF
+  PROVIDE mo_l_coef mo_r_coef
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '================', '====', '========'
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+          , '    gradient    ', '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+
+  write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+    '====', '================', '================', '================', '================', '================' &
+          , '================', '================', '================', '====', '========'
+
+
+  ! first iteration (HF orbitals)
+  call wall_time(t0)
+
+  etc_tot = TC_HF_energy
+  etc_1e  = TC_HF_one_e_energy
+  etc_2e  = TC_HF_two_e_energy
+  etc_3e  = 0.d0
+  if(three_body_h_tc) then
+    etc_3e = diag_three_elem_hf
+  endif
+  tc_grad = grad_non_hermit
+  er_DIIS = maxval(abs(FQS_SQF_mo))
+  e_delta = dabs(etc_tot - e_save)
+
+  e_save  = etc_tot
+  g_save  = tc_grad
+  er_save = er_DIIS
+
+  call wall_time(t1)
+  write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+    it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+  ! ---
+
+  PROVIDE FQS_SQF_ao Fock_matrix_tc_ao_tot
+
+  do while((tc_grad .gt. dsqrt(thresh_tcscf)) .and. (er_DIIS .gt. threshold_DIIS_nonzero_TCSCF))
+
+    call wall_time(t0)
+
+    it += 1
+    if(it > n_it_TCSCF_max) then
+      print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+      stop
+    endif
+
+    dim_DIIS = min(dim_DIIS+1, max_dim_DIIS_TCSCF)
+
+    ! ---
+
+    if(dabs(e_delta) > 1.d-12) then
+
+      index_dim_DIIS = mod(dim_DIIS-1, max_dim_DIIS_TCSCF) + 1
+      do j = 1, ao_num
+        do i = 1, ao_num
+          F_DIIS(i,j,index_dim_DIIS) = Fock_matrix_tc_ao_tot(i,j)
+          E_DIIS(i,j,index_dim_DIIS) = FQS_SQF_ao           (i,j)
+        enddo
+      enddo
+
+      call extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1), it, dim_DIIS)
+
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                            , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) )
+      TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot
+    endif
+
+    ! ---
+
+    mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+    mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+    !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+    !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    TOUCH mo_l_coef mo_r_coef
+
+    ! ---
+
+    g_delta  = grad_non_hermit         -  g_save
+    er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+    !if((g_delta > rate_th * g_delta_th) .and. (er_delta > rate_th * er_delta_th) .and. (it > 1)) then
+    if((g_delta > rate_th * g_delta_th) .and. (it > 1)) then
+    !if((g_delta > 0.d0) .and. (it > 1)) then
+
+      Fock_matrix_tc_ao_tot(1:ao_num,1:ao_num) = F_DIIS(1:ao_num,1:ao_num,index_dim_DIIS)
+      call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_tot, size(Fock_matrix_tc_ao_tot, 1) &
+                            , Fock_matrix_tc_mo_tot, size(Fock_matrix_tc_mo_tot, 1) )
+      TOUCH Fock_matrix_tc_mo_tot fock_matrix_tc_diag_mo_tot
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+    endif
+
+    ! ---
+
+    g_delta  = grad_non_hermit         -  g_save
+    er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+    mo_l_coef_save(1:ao_num,1:mo_num) = mo_l_coef(1:ao_num,1:mo_num)
+    mo_r_coef_save(1:ao_num,1:mo_num) = mo_r_coef(1:ao_num,1:mo_num)
+
+    !do while((g_delta > rate_th * g_delta_th) .and. (er_delta > rate_th * er_delta_th) .and. (it > 1))
+    do while((g_delta > rate_th * g_delta_th) .and. (it > 1))
+      print *, ' big or bad step : ', g_delta, rate_th * g_delta_th
+
+      mo_l_coef(1:ao_num,1:mo_num) = mo_l_coef_save(1:ao_num,1:mo_num) 
+      mo_r_coef(1:ao_num,1:mo_num) = mo_r_coef_save(1:ao_num,1:mo_num) 
+      if(level_shift_TCSCF <= .1d0) then
+        level_shift_TCSCF = 1.d0
+      else
+        level_shift_TCSCF = level_shift_TCSCF * 3.0d0
+      endif
+      TOUCH mo_l_coef mo_r_coef level_shift_TCSCF
+
+      mo_l_coef(1:ao_num,1:mo_num) = fock_tc_leigvec_ao(1:ao_num,1:mo_num)
+      mo_r_coef(1:ao_num,1:mo_num) = fock_tc_reigvec_ao(1:ao_num,1:mo_num)
+      !call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      !call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+      g_delta  = grad_non_hermit         -  g_save
+      er_delta = maxval(abs(FQS_SQF_mo)) - er_save
+
+      if(level_shift_TCSCF - level_shift_save > 40.d0) then
+        level_shift_TCSCF = level_shift_save * 4.d0
+        SOFT_TOUCH level_shift_TCSCF
+        exit
+      endif
+
+      dim_DIIS = 0
+    enddo
+
+    ! ---
+
+    level_shift_TCSCF = level_shift_TCSCF * 0.5d0
+    SOFT_TOUCH level_shift_TCSCF
+
+    etc_tot = TC_HF_energy
+    etc_1e  = TC_HF_one_e_energy
+    etc_2e  = TC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    tc_grad  = grad_non_hermit
+    er_DIIS  = maxval(abs(FQS_SQF_mo))
+    e_delta  = dabs(etc_tot - e_save)
+    g_delta  = tc_grad - g_save
+    er_delta = er_DIIS - er_save
+    
+    e_save           = etc_tot
+    g_save           = tc_grad
+    level_shift_save = level_shift_TCSCF
+    er_save          = er_DIIS
+
+    g_delta_th  = dabs(tc_grad) ! g_delta)
+    er_delta_th = dabs(er_DIIS) !er_delta)
+
+    call wall_time(t1)
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+    if(g_delta .lt. 0.d0) then
+      call ezfio_set_tc_scf_bitc_energy(etc_tot)
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+    endif
+
+    if(qp_stop()) exit
+  enddo
+
+  ! ---
+
+  print *, ' TCSCF DIIS converged !'
+  call print_energy_and_mos()
+
+  call write_time(6)
+
+  deallocate(mo_r_coef_save, mo_l_coef_save, F_DIIS, E_DIIS)
+
+  call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
+  call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+
+end
+
+! ---
+
+subroutine extrapolate_TC_Fock_matrix(E_DIIS, F_DIIS, F_ao, size_F_ao, it, dim_DIIS)
+
+  BEGIN_DOC
+  !
+  ! Compute the extrapolated Fock matrix using the DIIS procedure
+  !
+  ! e = \sum_i c_i e_i and \sum_i c_i = 1 
+  ! ==> lagrange multiplier with L = |e|^2 - \lambda (\sum_i c_i = 1)
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)    :: it, size_F_ao
+  integer,          intent(inout) :: dim_DIIS
+  double precision, intent(in)    :: F_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(in)    :: E_DIIS(ao_num,ao_num,dim_DIIS)
+  double precision, intent(inout) :: F_ao(size_F_ao,ao_num)
+
+  double precision, allocatable   :: B_matrix_DIIS(:,:), X_vector_DIIS(:), C_vector_DIIS(:)
+
+  integer                         :: i, j, k, l, i_DIIS, j_DIIS
+  integer                         :: lwork
+  double precision                :: rcond, ferr, berr
+  integer,          allocatable   :: iwork(:)
+  double precision, allocatable   :: scratch(:,:)
+
+  if(dim_DIIS < 1) then
+    return
+  endif
+
+  allocate( B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), X_vector_DIIS(dim_DIIS+1) &
+          , C_vector_DIIS(dim_DIIS+1), scratch(ao_num,ao_num) )
+
+  ! Compute the matrices B and X
+  B_matrix_DIIS(:,:) = 0.d0
+  do j = 1, dim_DIIS
+    j_DIIS = min(dim_DIIS, mod(it-j, max_dim_DIIS_TCSCF)+1)
+
+    do i = 1, dim_DIIS
+      i_DIIS = min(dim_DIIS, mod(it-i, max_dim_DIIS_TCSCF)+1)
+
+      ! Compute product of two errors vectors
+      do l = 1, ao_num
+        do k = 1, ao_num
+          B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + E_DIIS(k,l,i_DIIS) * E_DIIS(k,l,j_DIIS)
+        enddo
+      enddo
+
+    enddo
+  enddo
+
+  ! Pad B matrix and build the X matrix
+
+  C_vector_DIIS(:) = 0.d0
+  do i = 1, dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+  enddo
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+  deallocate(scratch)
+
+  ! Estimate condition number of B
+  integer                       :: info
+  double precision              :: anorm
+  integer,          allocatable :: ipiv(:)
+  double precision, allocatable :: AF(:,:)
+  double precision, external :: dlange
+
+  lwork = max((dim_DIIS+1)**2, (dim_DIIS+1)*5)
+  allocate(AF(dim_DIIS+1,dim_DIIS+1))
+  allocate(ipiv(2*(dim_DIIS+1)), iwork(2*(dim_DIIS+1)) )
+  allocate(scratch(lwork,1))
+  scratch(:,1) = 0.d0
+
+  anorm = dlange('1', dim_DIIS+1, dim_DIIS+1, B_matrix_DIIS, size(B_matrix_DIIS, 1), scratch(1,1))
+
+  AF(:,:) = B_matrix_DIIS(:,:)
+  call dgetrf(dim_DIIS+1, dim_DIIS+1, AF, size(AF, 1), ipiv, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  call dgecon('1', dim_DIIS+1, AF, size(AF, 1), anorm, rcond, scratch, iwork, info)
+  if(info /= 0) then
+    dim_DIIS = 0
+    return
+  endif
+
+  if(rcond < 1.d-14) then
+    dim_DIIS = 0
+    return
+  endif
+
+  ! solve the linear system C = B x X
+
+  X_vector_DIIS = C_vector_DIIS
+  call dgesv(dim_DIIS+1, 1, B_matrix_DIIS, size(B_matrix_DIIS, 1), ipiv , X_vector_DIIS, size(X_vector_DIIS, 1), info)
+
+  deallocate(scratch, AF, iwork)
+  if(info < 0) then
+    stop ' bug in TC-DIIS'
+  endif
+
+  ! Compute extrapolated Fock matrix
+
+  !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED) if (ao_num > 200)
+  do j = 1, ao_num
+    do i = 1, ao_num
+      F_ao(i,j) = 0.d0
+    enddo
+    do k = 1, dim_DIIS
+      if(dabs(X_vector_DIIS(k)) < 1.d-10) cycle
+      do i = 1,ao_num
+        ! FPE here
+        F_ao(i,j) = F_ao(i,j) + X_vector_DIIS(k) * F_DIIS(i,j,dim_DIIS-k+1)
+      enddo
+    enddo
+  enddo
+  !$OMP END PARALLEL DO
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rh_tcscf_simple.irp.f b/src/tc_scf/rh_tcscf_simple.irp.f
new file mode 100644
index 00000000..30798e3d
--- /dev/null
+++ b/src/tc_scf/rh_tcscf_simple.irp.f
@@ -0,0 +1,129 @@
+! ---
+
+subroutine rh_tcscf_simple()
+
+  implicit none
+  integer                       :: i, j, it, dim_DIIS
+  double precision              :: t0, t1
+  double precision              :: e_save, e_delta, rho_delta
+  double precision              :: etc_tot, etc_1e, etc_2e, etc_3e, tc_grad
+  double precision              :: er_DIIS
+  double precision, allocatable :: rho_old(:,:), rho_new(:,:)
+
+  allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num))
+
+  it       = 0
+  e_save   = 0.d0
+  dim_DIIS = 0
+
+  ! ---
+
+  if(.not. bi_ortho) then
+   print *, ' grad_hermit = ', grad_hermit
+   call save_good_hermit_tc_eigvectors
+   TOUCH mo_coef 
+   call save_mos
+  endif
+
+  ! ---
+
+  if(bi_ortho) then
+
+    PROVIDE level_shift_tcscf
+    PROVIDE mo_l_coef mo_r_coef
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      '====', '================', '================', '================', '================', '================' &
+            , '================', '================', '================', '====', '========'
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      ' it ', '  SCF TC Energy ', '      E(1e)     ', '      E(2e)     ', '      E(3e)     ', '   energy diff  ' &
+            , '    gradient    ', '    DIIS error  ', '  level shift   ', 'DIIS', '  WT (m)'
+
+    write(6, '(A4,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A16,1X, A4, 1X, A8)')              &
+      '====', '================', '================', '================', '================', '================' &
+            , '================', '================', '================', '====', '========'
+
+
+    ! first iteration (HF orbitals)
+    call wall_time(t0)
+
+    etc_tot = TC_HF_energy
+    etc_1e  = TC_HF_one_e_energy
+    etc_2e  = TC_HF_two_e_energy
+    etc_3e  = 0.d0
+    if(three_body_h_tc) then
+      etc_3e = diag_three_elem_hf
+    endif
+    tc_grad = grad_non_hermit
+    er_DIIS = maxval(abs(FQS_SQF_mo))
+    e_delta = dabs(etc_tot - e_save)
+    e_save  = etc_tot
+
+    call wall_time(t1)
+    write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+      it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+
+    do while(tc_grad .gt. dsqrt(thresh_tcscf))
+      call wall_time(t0)
+
+      it += 1
+      if(it > n_it_tcscf_max) then
+        print *, ' max of TCSCF iterations is reached ', n_it_TCSCF_max
+        stop
+      endif
+
+      mo_l_coef = fock_tc_leigvec_ao
+      mo_r_coef = fock_tc_reigvec_ao
+      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
+      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
+      TOUCH mo_l_coef mo_r_coef
+
+      etc_tot = TC_HF_energy
+      etc_1e  = TC_HF_one_e_energy
+      etc_2e  = TC_HF_two_e_energy
+      etc_3e  = 0.d0
+      if(three_body_h_tc) then
+        etc_3e = diag_three_elem_hf
+      endif
+      tc_grad = grad_non_hermit
+      er_DIIS = maxval(abs(FQS_SQF_mo))
+      e_delta = dabs(etc_tot - e_save)
+      e_save  = etc_tot
+
+      call ezfio_set_tc_scf_bitc_energy(etc_tot)
+
+      call wall_time(t1)
+      write(6, '(I4,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, F16.10,1X, I4,1X, F8.2)')  &
+        it, etc_tot, etc_1e, etc_2e, etc_3e, e_delta, tc_grad, er_DIIS, level_shift_tcscf, dim_DIIS, (t1-t0)/60.d0
+    enddo
+
+  else
+
+   do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. (it.lt.n_it_tcscf_max) )
+      print*,'grad_hermit = ',grad_hermit
+      it += 1
+      print *, 'iteration = ', it
+      print *, '***'
+      print *, 'TC HF total energy = ', TC_HF_energy
+      print *, 'TC HF 1 e   energy = ', TC_HF_one_e_energy
+      print *, 'TC HF 2 e   energy = ', TC_HF_two_e_energy
+      print *, 'TC HF 3 body       = ', diag_three_elem_hf
+      print *, '***'
+      print *, ''
+      call save_good_hermit_tc_eigvectors
+      TOUCH mo_coef 
+      call save_mos
+    enddo
+
+  endif
+
+  print *, ' TCSCF Simple converged !'
+  call print_energy_and_mos()
+
+  deallocate(rho_old, rho_new)
+
+end
+
+! ---
+
diff --git a/src/tc_scf/rotate_tcscf_orbitals.irp.f b/src/tc_scf/rotate_tcscf_orbitals.irp.f
index d53991ed..fc4a7935 100644
--- a/src/tc_scf/rotate_tcscf_orbitals.irp.f
+++ b/src/tc_scf/rotate_tcscf_orbitals.irp.f
@@ -260,14 +260,10 @@ subroutine fix_right_to_one()
   integer                       :: i, j, m, n, mm, tot_deg
   double precision              :: accu_d, accu_nd
   double precision              :: de_thr, ei, ej, de
-  double precision              :: thr_d, thr_nd
   integer,          allocatable :: deg_num(:)
   double precision, allocatable :: R0(:,:), L0(:,:), W(:,:), e0(:)
   double precision, allocatable :: R(:,:), L(:,:), S(:,:), Stmp(:,:), tmp(:,:)
 
-  thr_d  = 1d-7
-  thr_nd = 1d-7
-
   n = ao_num
   m = mo_num
 
@@ -340,7 +336,7 @@ subroutine fix_right_to_one()
       ! ---
 
       call impose_weighted_orthog_svd(n, mm, W, R)
-      call impose_weighted_biorthog_qr(n, mm, thr_d, thr_nd, R, W, L)
+      call impose_weighted_biorthog_qr(n, mm, thresh_biorthog_diag, thresh_biorthog_nondiag, R, W, L)
 
       ! ---
 
@@ -353,7 +349,7 @@ subroutine fix_right_to_one()
     endif
   enddo
 
-  call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thr_d, thr_nd, .true.)
+  call check_weighted_biorthog_binormalize(n, m, L0, W, R0, thresh_biorthog_diag, thresh_biorthog_nondiag, .true.)
 
   deallocate(W, deg_num)
 
diff --git a/src/tc_scf/routines_rotates.irp.f b/src/tc_scf/routines_rotates.irp.f
index 42925e41..596ae500 100644
--- a/src/tc_scf/routines_rotates.irp.f
+++ b/src/tc_scf/routines_rotates.irp.f
@@ -116,7 +116,7 @@ subroutine routine_save_rotated_mos(thr_deg, good_angles)
     print *, ' ------------------------------------'
     call orthog_functions(ao_num, n_degen, mo_l_coef_tmp, ao_overlap)
 
-    print *, ' Overlap lef-right '
+    print *, ' Overlap left-right '
     call build_s_matrix(ao_num, n_degen, mo_r_coef_tmp, mo_l_coef_tmp, ao_overlap, stmp)
     do j = 1, n_degen
      write(*,'(100(F8.4,X))') stmp(:,j)
@@ -259,7 +259,7 @@ subroutine orthog_functions(m, n, coef, overlap)
   double precision, intent(in)    :: overlap(m,m)
   double precision, intent(inout) :: coef(m,n)
   double precision, allocatable   :: stmp(:,:)
-  integer                         :: j
+  integer                         :: j, k
 
   allocate(stmp(n,n))
   call build_s_matrix(m, n, coef, coef, overlap, stmp)
@@ -270,7 +270,13 @@ subroutine orthog_functions(m, n, coef, overlap)
   call impose_orthog_svd_overlap(m, n, coef, overlap)
   call build_s_matrix(m, n, coef, coef, overlap, stmp)
   do j = 1, n
-    coef(1,:m) *= 1.d0/dsqrt(stmp(j,j))
+    ! ---
+    ! TODO: MANU check ici
+    !coef(1,:m) *= 1.d0/dsqrt(stmp(j,j))
+    do k = 1, m
+      coef(k,j) *= 1.d0/dsqrt(stmp(j,j))
+    enddo
+    ! ---
   enddo
   call build_s_matrix(m, n, coef, coef, overlap, stmp)
 
diff --git a/src/tc_scf/tc_scf.irp.f b/src/tc_scf/tc_scf.irp.f
index 48cbbdc0..187750ff 100644
--- a/src/tc_scf/tc_scf.irp.f
+++ b/src/tc_scf/tc_scf.irp.f
@@ -1,7 +1,9 @@
+! ---
+
 program tc_scf
 
   BEGIN_DOC
-! TODO : Put the documentation of the program here
+  ! TODO : Put the documentation of the program here
   END_DOC
 
   implicit none
@@ -15,43 +17,51 @@ program tc_scf
 !  my_n_pt_a_grid = 26 ! small grid for quick debug
   touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 
-  !call create_guess
-  !call orthonormalize_mos
+  PROVIDE mu_erf 
+  print *, ' mu = ', mu_erf
+  PROVIDE j1b_type
+  print *, ' j1b_type = ', j1b_type
+  print *, j1b_pen
+
+  !call create_guess()
+  !call orthonormalize_mos()
+
+  PROVIDE tcscf_algorithm
+  if(tcscf_algorithm == 'DIIS') then
+    call rh_tcscf_diis()
+  elseif(tcscf_algorithm == 'Simple') then
+    call rh_tcscf_simple()
+  else
+    print *, ' not implemented yet', tcscf_algorithm
+    stop
+  endif
 
-  call routine_scf()
   call minimize_tc_orb_angles()
   call print_energy_and_mos()
 
-
 end
 
 ! ---
 
-subroutine create_guess
-
-  BEGIN_DOC
-  !   Create a MO guess if no MOs are present in the EZFIO directory
-  END_DOC
+subroutine create_guess()
 
   implicit none
   logical :: exists
 
   PROVIDE ezfio_filename
-  call ezfio_has_mo_basis_mo_coef(exists)
+  !call ezfio_has_mo_basis_mo_coef(exists)
+  exists = .false.
 
-  if (.not.exists) then
+  if(.not.exists) then
     mo_label = 'Guess'
-    if (mo_guess_type == "HCore") then
+    if(mo_guess_type == "HCore") then
       mo_coef = ao_ortho_lowdin_coef
       call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
       TOUCH mo_coef
-      call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals,     &
-          size(mo_one_e_integrals,1),                            &
-          size(mo_one_e_integrals,2),                            &
-          mo_label,1,.false.)
-      call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef,1), 1.d-10)
+      call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals, size(mo_one_e_integrals, 1), size(mo_one_e_integrals, 2), mo_label, 1, .false.)
+      call restore_symmetry(ao_num, mo_num, mo_coef, size(mo_coef, 1), 1.d-10)
       SOFT_TOUCH mo_coef
-    else if (mo_guess_type == "Huckel") then
+    elseif (mo_guess_type == "Huckel") then
       call huckel_guess
     else
       print *,  'Unrecognized MO guess type : '//mo_guess_type
@@ -64,121 +74,3 @@ end subroutine create_guess
 
 ! ---
 
-subroutine routine_scf()
-
-  implicit none
-  integer                       :: i, j, it
-  double precision              :: e_save, e_delta, rho_delta
-  double precision, allocatable :: rho_old(:,:), rho_new(:,:)
-
-  allocate(rho_old(ao_num,ao_num), rho_new(ao_num,ao_num))
-
-  it = 0
-  print*,'iteration = ', it
-
-  !print*,'grad_hermit = ', grad_hermit
-  print*,'***'
-  print*,'TC HF total energy = ', TC_HF_energy
-  print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
-  print*,'TC HF 2 e   energy = ', TC_HF_two_e_energy
-  if(three_body_h_tc)then
-   print*,'TC HF 3 body       = ', diag_three_elem_hf
-  endif
-  print*,'***'
-  e_delta = 10.d0
-  e_save  = 0.d0 !TC_HF_energy
-  rho_delta = 10.d0
-
-
-  if(bi_ortho)then
-
-   mo_l_coef = fock_tc_leigvec_ao
-   mo_r_coef = fock_tc_reigvec_ao
-   rho_old   = TCSCF_bi_ort_dm_ao
-   call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
-   call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
-   TOUCH mo_l_coef mo_r_coef
-
-
-  else
-
-   print*,'grad_hermit = ',grad_hermit
-   call save_good_hermit_tc_eigvectors
-   TOUCH mo_coef 
-   call save_mos
-
-  endif
-
-  ! ---
-
-  if(bi_ortho) then
-
-    !do while( it .lt. n_it_tcscf_max .and. (e_delta .gt. dsqrt(thresh_tcscf)) )
-    !do while( it .lt. n_it_tcscf_max .and. (e_delta .gt. thresh_tcscf) )
-    !do while( it .lt. n_it_tcscf_max .and. (rho_delta .gt. thresh_tcscf) )
-    do while( it .lt. n_it_tcscf_max .and. (grad_non_hermit_right.gt. dsqrt(thresh_tcscf)) )
-
-      it += 1
-      print*,'iteration = ', it
-      print*,'***'
-      print*,'TC HF total energy = ', TC_HF_energy
-      print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
-      print*,'TC HF 2 non hermit = ', TC_HF_two_e_energy
-      if(three_body_h_tc)then
-       print*,'TC HF 3 body       = ', diag_three_elem_hf
-      endif
-      print*,'***'
-      e_delta = dabs( TC_HF_energy - e_save )
-      print*, 'it, delta E = ', it, e_delta
-      print*, 'it, gradient= ',grad_non_hermit_right
-      e_save    = TC_HF_energy
-      mo_l_coef = fock_tc_leigvec_ao
-      mo_r_coef = fock_tc_reigvec_ao
-
-      rho_new   = TCSCF_bi_ort_dm_ao
-      !print*, rho_new
-      rho_delta = 0.d0
-      do i = 1, ao_num 
-        do j = 1, ao_num 
-          rho_delta += dabs(rho_new(j,i) - rho_old(j,i))
-        enddo
-      enddo
-      print*, ' rho_delta =', rho_delta
-      rho_old = rho_new
-
-      call ezfio_set_bi_ortho_mos_mo_l_coef(mo_l_coef)
-      call ezfio_set_bi_ortho_mos_mo_r_coef(mo_r_coef)
-      TOUCH mo_l_coef mo_r_coef
-
-      call ezfio_set_tc_scf_bitc_energy(TC_HF_energy)
-
-    enddo
-
-  else
-   do while( (grad_hermit.gt.dsqrt(thresh_tcscf)) .and. it .lt. n_it_tcscf_max )
-      print*,'grad_hermit = ',grad_hermit
-      it += 1
-      print*,'iteration = ', it
-      print*,'***'
-      print*,'TC HF total energy = ', TC_HF_energy
-      print*,'TC HF 1 e   energy = ', TC_HF_one_electron_energy
-      print*,'TC HF 2 e   energy = ', TC_HF_two_e_energy
-      print*,'TC HF 3 body       = ', diag_three_elem_hf
-      print*,'***'
-      call save_good_hermit_tc_eigvectors
-      TOUCH mo_coef 
-      call save_mos
-
-    enddo
-
-  endif
-
-  print*,'Energy converged !'
-  call print_energy_and_mos
-
-  deallocate(rho_old, rho_new)
-
-end subroutine routine_scf
-
-! ---
-
diff --git a/src/tc_scf/tc_scf_dm.irp.f b/src/tc_scf/tc_scf_dm.irp.f
index f6ae3e1f..4750199c 100644
--- a/src/tc_scf/tc_scf_dm.irp.f
+++ b/src/tc_scf/tc_scf_dm.irp.f
@@ -1,25 +1,39 @@
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_beta, (ao_num, ao_num) ]
- implicit none
- if(bi_ortho)then
-  TCSCF_density_matrix_ao_beta  = TCSCF_bi_ort_dm_ao_beta
- else
-  TCSCF_density_matrix_ao_beta  = SCF_density_matrix_ao_beta
- endif
+
+  implicit none
+
+  if(bi_ortho) then
+    PROVIDE mo_l_coef mo_r_coef
+    TCSCF_density_matrix_ao_beta = TCSCF_bi_ort_dm_ao_beta
+  else
+    TCSCF_density_matrix_ao_beta = SCF_density_matrix_ao_beta
+  endif
+
 END_PROVIDER 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_alpha, (ao_num, ao_num) ]
- implicit none
- if(bi_ortho)then
-  TCSCF_density_matrix_ao_alpha  = TCSCF_bi_ort_dm_ao_alpha
- else
-  TCSCF_density_matrix_ao_alpha  = SCF_density_matrix_ao_alpha
- endif
+
+  implicit none
+
+  if(bi_ortho) then
+    PROVIDE mo_l_coef mo_r_coef
+    TCSCF_density_matrix_ao_alpha = TCSCF_bi_ort_dm_ao_alpha
+  else
+    TCSCF_density_matrix_ao_alpha = SCF_density_matrix_ao_alpha
+  endif
+
 END_PROVIDER 
 
 
+! ---
+
 BEGIN_PROVIDER [ double precision, TCSCF_density_matrix_ao_tot, (ao_num, ao_num) ]
- implicit none
-  TCSCF_density_matrix_ao_tot  = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha
+  implicit none
+  TCSCF_density_matrix_ao_tot = TCSCF_density_matrix_ao_beta + TCSCF_density_matrix_ao_alpha
 END_PROVIDER 
 
 
diff --git a/src/tc_scf/tc_scf_energy.irp.f b/src/tc_scf/tc_scf_energy.irp.f
index aa2a16ff..611b8b4c 100644
--- a/src/tc_scf/tc_scf_energy.irp.f
+++ b/src/tc_scf/tc_scf_energy.irp.f
@@ -1,6 +1,6 @@
 
  BEGIN_PROVIDER [ double precision, TC_HF_energy]
-&BEGIN_PROVIDER [ double precision, TC_HF_one_electron_energy]
+&BEGIN_PROVIDER [ double precision, TC_HF_one_e_energy]
 &BEGIN_PROVIDER [ double precision, TC_HF_two_e_energy]
 
   BEGIN_DOC
@@ -10,20 +10,22 @@
   implicit none
   integer :: i, j
 
+  PROVIDE mo_l_coef mo_r_coef
+
   TC_HF_energy = nuclear_repulsion
-  TC_HF_one_electron_energy = 0.d0
+  TC_HF_one_e_energy = 0.d0
   TC_HF_two_e_energy = 0.d0
 
   do j = 1, ao_num
     do i = 1, ao_num
       TC_HF_two_e_energy += 0.5d0 * ( two_e_tc_non_hermit_integral_alpha(i,j) * TCSCF_density_matrix_ao_alpha(i,j) &
-                                    + two_e_tc_non_hermit_integral_beta(i,j)  * TCSCF_density_matrix_ao_beta(i,j) )
-      TC_HF_one_electron_energy += ao_one_e_integrals_tc_tot(i,j) &
-                                       * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
+                                    + two_e_tc_non_hermit_integral_beta (i,j) * TCSCF_density_matrix_ao_beta (i,j) )
+      TC_HF_one_e_energy += ao_one_e_integrals_tc_tot(i,j) &
+                          * (TCSCF_density_matrix_ao_alpha(i,j) + TCSCF_density_matrix_ao_beta (i,j) )
     enddo
   enddo
 
-  TC_HF_energy += TC_HF_one_electron_energy + TC_HF_two_e_energy
+  TC_HF_energy += TC_HF_one_e_energy + TC_HF_two_e_energy
   TC_HF_energy += diag_three_elem_hf
 
 END_PROVIDER
diff --git a/src/tc_scf/tc_scf_utils.irp.f b/src/tc_scf/tc_scf_utils.irp.f
index 09a4a1b9..dde477c4 100644
--- a/src/tc_scf/tc_scf_utils.irp.f
+++ b/src/tc_scf/tc_scf_utils.irp.f
@@ -40,3 +40,4 @@ subroutine LTxSxR(n, m, L, S, R, C)
 end subroutine LTxR
 
 ! ---
+
diff --git a/src/tc_scf/test_int.irp.f b/src/tc_scf/test_int.irp.f
new file mode 100644
index 00000000..6abeddf1
--- /dev/null
+++ b/src/tc_scf/test_int.irp.f
@@ -0,0 +1,1008 @@
+program test_ints
+
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting test_ints ...'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 15 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  my_extra_grid_becke = .True.
+  my_n_pt_r_extra_grid = 30
+  my_n_pt_a_extra_grid = 50 ! small extra_grid for quick debug
+  touch my_extra_grid_becke my_n_pt_r_extra_grid my_n_pt_a_extra_grid
+
+!! OK 
+!call routine_int2_u_grad1u_j1b2 
+!! OK
+!call routine_v_ij_erf_rk_cst_mu_j1b
+!! OK 
+! call routine_x_v_ij_erf_rk_cst_mu_j1b
+!! OK
+! call routine_v_ij_u_cst_mu_j1b
+
+!! OK
+!call routine_int2_u2_j1b2
+
+!! OK
+!call routine_int2_u_grad1u_x_j1b2
+
+!! OK 
+! call routine_int2_grad1u2_grad2u2_j1b2
+! call routine_int2_u_grad1u_j1b2
+! call test_total_grad_lapl
+! call test_total_grad_square
+! call test_ao_tc_int_chemist
+! call test_grid_points_ao
+! call test_tc_scf
+ !call test_int_gauss
+
+  !call test_fock_3e_uhf_ao()
+  !call test_fock_3e_uhf_mo()
+
+  !call test_tc_grad_and_lapl_ao()
+  !call test_tc_grad_square_ao()
+
+  call test_two_e_tc_non_hermit_integral()
+
+end
+
+! ---
+
+subroutine test_tc_scf
+ implicit none
+ integer :: i
+! provide int2_u_grad1u_x_j1b2_test
+ provide x_v_ij_erf_rk_cst_mu_j1b_test
+! do i = 1, ng_fit_jast
+!  print*,expo_gauss_1_erf_x_2(i),coef_gauss_1_erf_x_2(i)
+! enddo
+! provide tc_grad_square_ao_test
+!  provide tc_grad_and_lapl_ao_test
+! provide int2_u_grad1u_x_j1b2_test
+! provide x_v_ij_erf_rk_cst_mu_j1b_test
+! print*,'TC_HF_energy = ',TC_HF_energy
+! print*,'grad_non_hermit = ',grad_non_hermit
+end
+
+subroutine test_ao_tc_int_chemist
+ implicit none
+ provide ao_tc_int_chemist
+! provide ao_tc_int_chemist_test
+! provide tc_grad_square_ao_test
+! provide tc_grad_and_lapl_ao_test
+end
+
+! ---
+
+subroutine routine_test_j1b
+ implicit none
+ integer :: i,icount,j
+ icount = 0
+ do i = 1, List_all_comb_b3_size
+  if(dabs(List_all_comb_b3_coef(i)).gt.1.d-10)then
+   print*,''
+   print*,List_all_comb_b3_expo(i),List_all_comb_b3_coef(i)
+   print*,List_all_comb_b3_cent(1:3,i)
+   print*,''
+   icount += 1
+  endif
+  
+ enddo
+ print*,'List_all_comb_b3_coef,icount = ',List_all_comb_b3_size,icount
+ do i = 1, ao_num
+  do j = 1, ao_num
+   do icount = 1, List_comb_thr_b3_size(j,i)
+    print*,'',j,i
+    print*,List_comb_thr_b3_expo(icount,j,i),List_comb_thr_b3_coef(icount,j,i)
+    print*,List_comb_thr_b3_cent(1:3,icount,j,i)
+    print*,''
+   enddo
+!   enddo
+  enddo
+ enddo
+ print*,'max_List_comb_thr_b3_size = ',max_List_comb_thr_b3_size,List_all_comb_b3_size
+
+end
+
+subroutine routine_int2_u_grad1u_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u_grad1u_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u_grad1u_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_erf_rk_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_x_v_ij_erf_rk_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += x_v_ij_erf_rk_cst_mu_j1b_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += x_v_ij_erf_rk_cst_mu_j1b     (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+
+subroutine routine_v_ij_u_cst_mu_j1b_test
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_int2_grad1u2_grad2u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ integer :: ii , jj
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+ double precision, allocatable :: ints(:,:,:)
+ allocate(ints(ao_num, ao_num, n_points_final_grid))
+! do ipoint = 1, n_points_final_grid
+!  do i = 1, ao_num
+!   do j = 1, ao_num
+!    read(33,*)ints(j,i,ipoint)
+!   enddo
+!  enddo
+! enddo
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      array_ref(j,i,l,k)     += int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!     !array(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!       array_ref(j,i,l,k) += int2_grad1u2_grad2u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += ints(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+!      if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!       if(dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint)).gt.1.d-6)then
+!        print*,j,i,ipoint
+!        print*,int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) , int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint) - int2_grad1u2_grad2u2_j1b2_test(j,i,ipoint))
+!        print*,int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) , int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint), dabs(int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint) - int2_grad1u2_grad2u2_j1b2_test(i,j,ipoint))
+!        stop
+!       endif
+!      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ double precision :: e_ref, e_new
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+ e_ref = 0.d0
+ e_new = 0.d0
+ do ii = 1, elec_alpha_num
+  do jj = ii, elec_alpha_num
+   do k = 1, ao_num
+    do l = 1, ao_num
+     do i = 1, ao_num
+      do j = 1, ao_num
+       e_ref += mo_coef(j,ii) * mo_coef(i,ii) * array_ref(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       e_new += mo_coef(j,ii) * mo_coef(i,ii) * array(j,i,l,k) * mo_coef(l,jj) * mo_coef(k,jj)
+       contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+       accu_abs += contrib
+!       if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+!        accu_relat += contrib/dabs(array_ref(j,i,l,k))
+!       endif
+      enddo
+     enddo
+    enddo
+   enddo
+
+  enddo
+ enddo
+ print*,'e_ref = ',e_ref
+ print*,'e_new = ',e_new
+! print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+! print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_int2_u2_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += int2_u2_j1b2_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += int2_u2_j1b2(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+
+subroutine routine_int2_u_grad1u_x_j1b2
+ implicit none
+ integer :: i,j,ipoint,k,l,m
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+! print*,'ao_overlap_abs = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_overlap_abs(i,:)
+! enddo
+! print*,'center = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_center(2,i,:)
+! enddo
+! print*,'sigma = '
+! do i = 1, ao_num
+!   write(*,'(100(F10.5,X))')ao_prod_sigma(i,:)
+! enddo
+
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      do m = 1, 3
+       array(j,i,l,k)     += int2_u_grad1u_x_j1b2_test(j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+       array_ref(j,i,l,k) += int2_u_grad1u_x_j1b2     (j,i,ipoint,m) * aos_grad_in_r_array_transp(m,k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      enddo
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+  
+
+end
+
+subroutine routine_v_ij_u_cst_mu_j1b
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ double precision, allocatable :: array(:,:,:,:), array_ref(:,:,:,:)
+
+ allocate(array(ao_num, ao_num, ao_num, ao_num))
+ array = 0.d0
+ allocate(array_ref(ao_num, ao_num, ao_num, ao_num))
+ array_ref = 0.d0
+ do ipoint = 1, n_points_final_grid
+  weight = final_weight_at_r_vector(ipoint)
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      array(j,i,l,k)     += v_ij_u_cst_mu_j1b_test(j,i,ipoint) * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+      array_ref(j,i,l,k) += v_ij_u_cst_mu_j1b(j,i,ipoint)      * aos_in_r_array(k,ipoint) * aos_in_r_array(l,ipoint) * weight
+     enddo
+    enddo
+   enddo
+  enddo
+ enddo
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(array(j,i,l,k) - array_ref(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(array_ref(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(array_ref(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+end
+
+! ---
+
+subroutine test_fock_3e_uhf_ao()
+
+  implicit none
+  integer                       :: i, j
+  double precision              :: diff_tot, diff_ij, thr_ih, norm
+  double precision, allocatable :: fock_3e_uhf_ao_a_mo(:,:), fock_3e_uhf_ao_b_mo(:,:)
+
+  thr_ih = 1d-7
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_ao_a fock_3e_uhf_ao_b
+
+  ! ---
+
+  allocate(fock_3e_uhf_ao_a_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_a   , size(fock_3e_uhf_ao_a   , 1) &
+                        , fock_3e_uhf_ao_a_mo, size(fock_3e_uhf_ao_a_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_a_mo(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_a_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_a_mo)
+
+  ! ---
+
+  allocate(fock_3e_uhf_ao_b_mo(mo_num,mo_num))
+  call ao_to_mo_bi_ortho( fock_3e_uhf_ao_b   , size(fock_3e_uhf_ao_b   , 1) &
+                        , fock_3e_uhf_ao_b_mo, size(fock_3e_uhf_ao_b_mo, 1) )
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_ao_b_mo(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_ao_b_mo  (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, ' '
+
+  deallocate(fock_3e_uhf_ao_b_mo)
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_ao()
+
+! ---
+
+subroutine test_fock_3e_uhf_mo()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff_ij, thr_ih, norm
+
+  thr_ih = 1d-12
+
+  PROVIDE fock_a_tot_3e_bi_orth fock_b_tot_3e_bi_orth
+  PROVIDE fock_3e_uhf_mo_a fock_3e_uhf_mo_b
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_a(j,i) - fock_a_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_a_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_a     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_a_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_a = ', diff_tot / norm
+  print *, '      norm_a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      diff_ij = dabs(fock_3e_uhf_mo_b(j,i) - fock_b_tot_3e_bi_orth(j,i))
+      if(diff_ij .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' MANU : ', fock_b_tot_3e_bi_orth(j,i)
+        print *, ' UHF  : ', fock_3e_uhf_mo_b     (j,i)
+        !stop
+      endif
+
+      norm     += dabs(fock_b_tot_3e_bi_orth(j,i))
+      diff_tot += diff_ij
+    enddo
+  enddo
+  print *, ' diff on F_b = ', diff_tot/norm
+  print *, '      norm_b = ', norm
+  print *, ' '
+
+  ! ---
+
+end subroutine test_fock_3e_uhf_mo
+
+! ---
+
+subroutine test_total_grad_lapl
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_and_lapl_ao_test(j,i,l,k) - tc_grad_and_lapl_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_and_lapl_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_and_lapl_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_total_grad_square
+ implicit none
+ integer :: i,j,ipoint,k,l
+ double precision :: weight,accu_relat, accu_abs, contrib
+ accu_relat = 0.d0
+ accu_abs   = 0.d0
+  do k = 1, ao_num
+   do l = 1, ao_num
+    do i = 1, ao_num
+     do j = 1, ao_num
+      contrib = dabs(tc_grad_square_ao_test(j,i,l,k) - tc_grad_square_ao(j,i,l,k))
+      accu_abs += contrib
+      if(dabs(tc_grad_square_ao(j,i,l,k)).gt.1.d-10)then
+       accu_relat += contrib/dabs(tc_grad_square_ao(j,i,l,k))
+      endif
+     enddo
+    enddo
+   enddo
+  enddo
+ print*,'accu_abs   = ',accu_abs/dble(ao_num)**4
+ print*,'accu_relat = ',accu_relat/dble(ao_num)**4
+
+
+end
+
+subroutine test_grid_points_ao
+ implicit none
+ integer :: i,j,ipoint,icount,icount_good, icount_bad,icount_full
+ double precision :: thr
+ thr = 1.d-10
+! print*,'max_n_pts_grid_ao_prod = ',max_n_pts_grid_ao_prod
+! print*,'n_pts_grid_ao_prod'
+ do i = 1, ao_num
+  do j = i, ao_num
+  icount = 0
+  icount_good = 0
+  icount_bad = 0
+  icount_full = 0
+  do ipoint = 1, n_points_final_grid
+!   if(dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,1)) & 
+!    + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,2)) &
+!    + dabs(int2_u_grad1u_x_j1b2_test(j,i,ipoint,3)) )
+!   if(dabs(int2_u2_j1b2_test(j,i,ipoint)).gt.thr)then
+!    icount += 1
+!   endif
+   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_full += 1
+   endif
+   if(dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)).gt.thr)then
+    icount += 1
+    if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr*0.1d0)then
+    icount_good += 1
+    else
+    print*,j,i,ipoint
+    print*,dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)),dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint))/dabs(v_ij_u_cst_mu_j1b_test(j,i,ipoint))
+    icount_bad  += 1
+    endif
+   endif
+!   if(dabs(v_ij_u_cst_mu_j1b_ng_1_test(j,i,ipoint)).gt.thr)then
+!   endif
+  enddo
+   print*,''
+   print*,j,i
+   print*,icount,icount_full, icount_bad!,n_pts_grid_ao_prod(j,i)
+   print*,dble(icount)/dble(n_points_final_grid),dble(icount_full)/dble(n_points_final_grid)
+!          dble(n_pts_grid_ao_prod(j,i))/dble(n_points_final_grid)
+!   if(icount.gt.n_pts_grid_ao_prod(j,i))then
+!    print*,'pb !!'
+!   endif
+  enddo
+ enddo
+end
+
+subroutine test_int_gauss
+ implicit none
+ integer :: i,j
+ print*,'center'
+ do i = 1, ao_num
+  do j = i, ao_num
+   print*,j,i
+   print*,ao_prod_sigma(j,i),ao_overlap_abs_grid(j,i)
+   print*,ao_prod_center(1:3,j,i)
+  enddo
+ enddo
+ print*,''
+ double precision :: weight, r(3),integral_1,pi,center(3),f_r,alpha,distance,integral_2
+ center = 0.d0
+ pi = dacos(-1.d0)
+ integral_1 = 0.d0
+ integral_2 = 0.d0
+ alpha = 0.75d0
+ do i = 1,  n_points_final_grid
+  ! you get x, y and z of the ith grid point
+  r(1) = final_grid_points(1,i)
+  r(2) = final_grid_points(2,i)
+  r(3) = final_grid_points(3,i)
+  weight = final_weight_at_r_vector(i)
+  distance = dsqrt( (r(1) - center(1))**2 +  (r(2) - center(2))**2 + (r(3) - center(3))**2 )
+  f_r = dexp(-alpha * distance*distance)
+  ! you add the contribution of the grid point to the integral
+  integral_1 += f_r * weight
+  integral_2 += f_r * distance * weight
+ enddo
+ print*,'integral_1      =',integral_1
+ print*,'(pi/alpha)**1.5 =',(pi / alpha)**1.5
+ print*,'integral_2      =',integral_2
+ print*,'(pi/alpha)**1.5 =',2.d0*pi / (alpha)**2
+
+
+end
+
+! ---
+
+subroutine test_tc_grad_and_lapl_ao()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE tc_grad_and_lapl_ao tc_grad_and_lapl_ao_loop
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          diff = dabs(tc_grad_and_lapl_ao_loop(l,k,j,i) - tc_grad_and_lapl_ao(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' loops : ', tc_grad_and_lapl_ao_loop(l,k,j,i)
+            print *, ' lapack: ', tc_grad_and_lapl_ao     (l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_and_lapl_ao_loop(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+
+end
+
+! ---
+
+subroutine test_tc_grad_square_ao()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE tc_grad_square_ao tc_grad_square_ao_loop
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          diff = dabs(tc_grad_square_ao_loop(l,k,j,i) - tc_grad_square_ao(l,k,j,i))
+          if(diff .gt. thr_ih) then
+            print *, ' difference on ', l, k, j, i
+            print *, ' loops : ', tc_grad_square_ao_loop(l,k,j,i)
+            print *, ' lapack: ', tc_grad_square_ao     (l,k,j,i)
+            !stop
+          endif
+
+          norm     += dabs(tc_grad_square_ao_loop(l,k,j,i))
+          diff_tot += diff
+        enddo
+      enddo
+    enddo
+  enddo
+
+  print *, ' diff tot = ', diff_tot / norm
+  print *, '     norm = ', norm
+  print *, ' '
+
+  return
+
+end
+
+! ---
+
+subroutine test_two_e_tc_non_hermit_integral()
+
+  implicit none
+  integer          :: i, j
+  double precision :: diff_tot, diff, thr_ih, norm
+
+  thr_ih = 1d-10
+
+  PROVIDE two_e_tc_non_hermit_integral_beta two_e_tc_non_hermit_integral_alpha
+  PROVIDE two_e_tc_non_hermit_integral_seq_beta two_e_tc_non_hermit_integral_seq_alpha
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      diff = dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i) - two_e_tc_non_hermit_integral_alpha(j,i))
+      if(diff .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' seq         : ', two_e_tc_non_hermit_integral_seq_alpha(j,i)
+        print *, ' //          : ', two_e_tc_non_hermit_integral_alpha    (j,i)
+        !stop
+      endif
+
+      norm     += dabs(two_e_tc_non_hermit_integral_seq_alpha(j,i))
+      diff_tot += diff
+    enddo
+  enddo
+
+  print *, ' diff tot a = ', diff_tot / norm
+  print *, '     norm a = ', norm
+  print *, ' '
+
+  ! ---
+
+  norm     = 0.d0
+  diff_tot = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+
+      diff = dabs(two_e_tc_non_hermit_integral_seq_beta(j,i) - two_e_tc_non_hermit_integral_beta(j,i))
+      if(diff .gt. thr_ih) then
+        print *, ' difference on ', j, i
+        print *, ' seq         : ', two_e_tc_non_hermit_integral_seq_beta(j,i)
+        print *, ' //          : ', two_e_tc_non_hermit_integral_beta    (j,i)
+        !stop
+      endif
+
+      norm     += dabs(two_e_tc_non_hermit_integral_seq_beta(j,i))
+      diff_tot += diff
+    enddo
+  enddo
+
+  print *, ' diff tot b = ', diff_tot / norm
+  print *, '     norm b = ', norm
+  print *, ' '
+
+  ! ---
+
+  return
+
+end
+
+! ---
+
diff --git a/src/tools/print_he_energy.irp.f b/src/tools/print_he_energy.irp.f
index 87488fba..8daa2b8b 100644
--- a/src/tools/print_he_energy.irp.f
+++ b/src/tools/print_he_energy.irp.f
@@ -7,8 +7,8 @@ program print_he_energy
 
   call print_overlap()
 
-  call print_energy1()
-  call print_energy2()
+  !call print_energy1()
+  !call print_energy2()
 
 end 
 

From 0f9b2dbfe48f821ec9f67bf0008488d93a0ca2de Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Tue, 17 Jan 2023 01:54:58 +0100
Subject: [PATCH 29/68] fast restore symmetry

---
 src/utils/linear_algebra.irp.f | 126 +++++++++++++++++++++++++++++++--
 1 file changed, 122 insertions(+), 4 deletions(-)

diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index 1e33c7dc..9517766e 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1690,6 +1690,19 @@ subroutine restore_symmetry(m,n,A,LDA,thresh)
   thresh2 = dsqrt(thresh)
   call nullify_small_elements(m,n,A,LDA,thresh)
 
+  ! Debug
+  !double precision, allocatable :: B(:,:)
+  !double precision :: max_diff, ti,tf
+  !allocate(B(m,n))
+  !B = A
+  !call wall_time(ti)
+  !call restore_symmetry_fast(m,n,B,LDA,thresh)
+  !call wall_time(tf)
+  !print*,''
+  !print*,'Restore_symmetry'
+  !print*,'Fast version:',tf-ti,'s'
+  !call wall_time(ti)
+
 !  if (.not.restore_symm) then
 !    return
 !  endif
@@ -1749,23 +1762,128 @@ subroutine restore_symmetry(m,n,A,LDA,thresh)
     enddo
   enddo
 
+  ! Debug
+  !call wall_time(tf)
+  !print*,'Old version:',tf-ti,'s'
+
+  !max_diff = 0d0
+  !do j = 1, n
+  !  do i = 1, n
+  !    if (dabs(A(i,j)-B(i,j)) > max_diff) then
+  !      max_diff = dabs(A(i,j)-B(i,j))
+  !    endif
+  !  enddo
+  !enddo
+  !print*,'Max diff:', max_diff
+  !deallocate(B)
+
 end
 
+subroutine restore_symmetry_fast(m,n,A,LDA,thresh)
 
+  implicit none
 
+  BEGIN_DOC
+  ! Tries to find the matrix elements that are the same, and sets them
+  ! to the average value.
+  ! If restore_symm is False, only nullify small elements
+  END_DOC
 
+  integer, intent(in) :: m,n,LDA
+  double precision, intent(inout) :: A(LDA,n)
+  double precision, intent(in) :: thresh
 
+  double precision, allocatable :: copy(:), copy_sign(:)
+  integer, allocatable :: key(:)
+  integer :: sze, pi, pf, idx, i,j,k
+  double precision :: average, val, thresh2
 
+  thresh2 = dsqrt(thresh)
+  call nullify_small_elements(m,n,A,LDA,thresh)
+ 
+  sze = m * n
 
+  allocate(copy(sze),copy_sign(sze),key(sze))
+  
+  ! Copy to 1D
+  !$OMP PARALLEL &
+  !$OMP SHARED(A,m,n,sze,copy_sign,copy,key) &
+  !$OMP PRIVATE(i,j,k) &
+  !$OMP DEFAULT(NONE)
+  !$OMP DO
+  do j = 1, n
+    do i = 1, m
+      copy(i+(j-1)*m) = A(i,j)
+    enddo
+  enddo
+  !$OMP END DO
+  ! Copy sign
+  !$OMP DO
+  do i = 1,sze
+    copy_sign(i) = sign(1d0,copy(i))
+    copy(i) = dabs(copy(i))
+  enddo
+  !$OMP END DO NOWAIT
+  ! Keys
+  !$OMP DO
+  do i = 1, sze
+   key(i) = i
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
 
+  ! Sort
+  call dsort(copy,key,sze)
 
+  ! Symmetrize
+  i = 1
+  do while(i < sze)
+    pi = i
+    pf = i
+    val = copy(i)
+    do while (dabs(val - copy(pf+1)) < thresh2)
+      pf = pf + 1
+      ! if pf == sze, copy(pf+1) will not be valid
+      if (pf == sze) then
+        exit
+      endif
+    enddo
+    ! if pi and pf are different do the average from pi to pf
+    if (pf - pi > 0) then
+      average = 0d0
+      do j = pi, pf
+        average = average + copy(j)
+      enddo
+      average = average / (pf-pi+1)
+      do j = pi, pf
+        copy(j) = average
+      enddo
+      ! Update i
+      i = pf
+    endif
+    ! Update i
+    i = i + 1
+  enddo
 
+  !$OMP PARALLEL &
+  !$OMP SHARED(m,sze,copy_sign,copy,key,A) &
+  !$OMP PRIVATE(i,j,k,idx) &
+  !$OMP DEFAULT(NONE)
+  ! copy -> A
+  !$OMP DO
+  do k = 1, sze
+      idx = key(k)
+      i = mod(idx-1,m) + 1
+      j = (idx-1) / m + 1
+      ! New value with the right sign
+      A(i,j) = sign(copy(k),copy_sign(idx))
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
 
+  deallocate(copy,copy_sign,key)
 
-
-
-
-
+end
 
 !subroutine svd_s(A, LDA, U, LDU, D, Vt, LDVt, m, n)
 !  implicit none

From 17add36bdac7e0093ba993ced16e62d30cb4d7b7 Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Tue, 17 Jan 2023 09:49:14 +0100
Subject: [PATCH 30/68] remove error, replace symmetry condition

---
 src/utils/linear_algebra.irp.f | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index 9517766e..20599325 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1835,13 +1835,21 @@ subroutine restore_symmetry_fast(m,n,A,LDA,thresh)
   ! Sort
   call dsort(copy,key,sze)
 
+  !TODO 
+  ! Parallelization with OMP
+
+  ! Jump all the elements below thresh
+  i = 1 
+  do while (copy(i) <= thresh) 
+    i = i + 1
+  enddo
+
   ! Symmetrize
-  i = 1
   do while(i < sze)
     pi = i
     pf = i
-    val = copy(i)
-    do while (dabs(val - copy(pf+1)) < thresh2)
+    val = 1d0/copy(i)
+    do while (dabs(val * copy(pf+1) - 1d0) < thresh2)
       pf = pf + 1
       ! if pf == sze, copy(pf+1) will not be valid
       if (pf == sze) then

From 3d15420fb93c6becd64f1b6b34036ba319ea77e7 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 17 Jan 2023 11:12:54 +0200
Subject: [PATCH 31/68] Configuration for LUMI

---
 config/cray_gfortran.cfg            | 68 +++++++++++++++++++++++++++++
 config/{cray.cfg => cray_intel.cfg} |  0
 configure                           |  4 +-
 3 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 config/cray_gfortran.cfg
 rename config/{cray.cfg => cray_intel.cfg} (100%)

diff --git a/config/cray_gfortran.cfg b/config/cray_gfortran.cfg
new file mode 100644
index 00000000..1d1013b7
--- /dev/null
+++ b/config/cray_gfortran.cfg
@@ -0,0 +1,68 @@
+# On LUMI
+# 
+# export SPACK_USER_PREFIX=$HOME/spack
+# module swap PrgEnv-cray/8.3.3 PrgEnv-gnu/8.3.3
+# module load spack/22.08
+# module load openblas/0.3.17-gcc-omp-xi
+# Common flags
+##############
+#
+# -ffree-line-length-none : Needed for IRPF90 which produces long lines
+# -lblas -llapack         : Link with libblas and liblapack libraries provided by the system
+# -I .                    : Include the curent directory (Mandatory)
+#
+# --ninja                 : Allow the utilisation of ninja. (Mandatory)
+# --align=32              : Align all provided arrays on a 32-byte boundary
+#
+#
+[COMMON]
+FC           : gfortran -ffree-line-length-none -I . -mavx -g -fPIC
+LAPACK_LIB   : -L/appl/lumi/spack/22.08/0.18.1/opt/spack/openblas-0.3.17-xinceno/lib -lopenblas
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32 -DSET_NESTED 
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+# 
+[OPTION]
+MODE    : OPT        ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 0          ; Enable cache_compile.py
+OPENMP  : 1          ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -Ofast                  : Disregard strict standards compliance. Enables all -O3 optimizations.
+#                           It also enables optimizations that are not valid
+#                           for all standard-compliant programs.  It turns on
+#                           -ffast-math and the Fortran-specific
+#                           -fno-protect-parens and -fstack-arrays.
+[OPT]
+FCFLAGS : -Ofast -march=native
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC       : -p -g
+FCFLAGS  : -Ofast
+
+# Debugging flags
+#################
+#
+# -fcheck=all  : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+#
+[DEBUG]
+FCFLAGS : -fcheck=all -g
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -fopenmp
+IRPF90_FLAGS : --openmp
+
diff --git a/config/cray.cfg b/config/cray_intel.cfg
similarity index 100%
rename from config/cray.cfg
rename to config/cray_intel.cfg
diff --git a/configure b/configure
index 79cd7119..852082e7 100755
--- a/configure
+++ b/configure
@@ -99,7 +99,7 @@ PACKAGES=$(echo $PACKAGES | xargs)
 
 echo "export QP_ROOT=\"$QP_ROOT\"" > ${QP_ROOT}/etc/00.qp_root.rc
 
-source quantum_package.rc
+source ${QP_ROOT}/quantum_package.rc
 
 
 
@@ -293,7 +293,7 @@ EOF
 
 done
 
-source quantum_package.rc
+source ${QP_ROOT}/quantum_package.rc
 
 NINJA=$(find_exe ninja)
 if [[ ${NINJA} = $(not_found) ]] ; then

From 2f937cbca4b8df1d663cf61e01329b08eddd4128 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 17 Jan 2023 13:07:50 +0100
Subject: [PATCH 32/68] Use Yann's restore_symmetry

---
 src/determinants/slater_rules.irp.f |  21 +++-
 src/utils/linear_algebra.irp.f      | 175 +++++-----------------------
 2 files changed, 52 insertions(+), 144 deletions(-)

diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f
index b9710fd1..9bab9213 100644
--- a/src/determinants/slater_rules.irp.f
+++ b/src/determinants/slater_rules.irp.f
@@ -3,8 +3,27 @@ subroutine get_excitation_degree(key1,key2,degree,Nint)
   include 'utils/constants.include.F'
   implicit none
   BEGIN_DOC
-  ! Returns the excitation degree between two determinants.
+  ! This function calculates the excitation degree between two
+  ! determinants, which is half the number of bits that are different between the two
+  ! determinants. The function takes four arguments: 
+  !
+  !  * key1: An integer array of length Nint*2, representing the first determinant.
+  !
+  !  * key2: An integer array of length Nint*2, representing the second determinant.
+  !
+  !  * degree: An integer, passed by reference, that will store the calculated excitation degree.
+  !
+  !  * Nint: An integer representing the number of integers in each of the key1 and key2 arrays.
+  ! 
+  ! It starts a select case block that depends on the value of Nint. 
+  ! In each case, the function first calculates the bitwise XOR of each
+  ! corresponding pair of elements in key1 and key2, storing the results in the
+  ! xorvec array. It then calculates the number of bits set (using the popcnt
+  ! function) for each element in xorvec, and sums these counts up. This sum is
+  ! stored in the degree variable.  
+  ! Finally, the degree variable is right-shifted by 1 bit to divide the result by 2.
   END_DOC
+
   integer, intent(in)            :: Nint
   integer(bit_kind), intent(in)  :: key1(Nint*2)
   integer(bit_kind), intent(in)  :: key2(Nint*2)
diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index 20599325..c8a03e65 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1675,111 +1675,6 @@ subroutine nullify_small_elements(m,n,A,LDA,thresh)
 end
 
 subroutine restore_symmetry(m,n,A,LDA,thresh)
-  implicit none
-  BEGIN_DOC
-! Tries to find the matrix elements that are the same, and sets them
-! to the average value.
-! If restore_symm is False, only nullify small elements
-  END_DOC
-  integer, intent(in) :: m,n,LDA
-  double precision, intent(inout) :: A(LDA,n)
-  double precision, intent(in) :: thresh
-  integer :: i,j,k,l
-  logical, allocatable :: done(:,:)
-  double precision :: f, g, count, thresh2
-  thresh2 = dsqrt(thresh)
-  call nullify_small_elements(m,n,A,LDA,thresh)
-
-  ! Debug
-  !double precision, allocatable :: B(:,:)
-  !double precision :: max_diff, ti,tf
-  !allocate(B(m,n))
-  !B = A
-  !call wall_time(ti)
-  !call restore_symmetry_fast(m,n,B,LDA,thresh)
-  !call wall_time(tf)
-  !print*,''
-  !print*,'Restore_symmetry'
-  !print*,'Fast version:',tf-ti,'s'
-  !call wall_time(ti)
-
-!  if (.not.restore_symm) then
-!    return
-!  endif
-
-  ! TODO:  Costs O(n^4), but can be improved to (2 n^2 * log(n)):
-  ! - copy all values in a 1D array
-  ! - sort 1D array
-  ! - average nearby elements
-  ! - for all elements, find matching value in the sorted 1D array
-
-  allocate(done(m,n))
-
-  do j=1,n
-    do i=1,m
-      done(i,j) = A(i,j) == 0.d0
-    enddo
-  enddo
-
-  do j=1,n
-    do i=1,m
-      if ( done(i,j) ) cycle
-      done(i,j) = .True.
-      count = 1.d0
-      f = 1.d0/A(i,j)
-      do l=1,n
-        do k=1,m
-          if ( done(k,l) ) cycle
-          g = f * A(k,l)
-          if ( dabs(dabs(g) - 1.d0) < thresh2 ) then
-            count = count + 1.d0
-            if (g>0.d0) then
-              A(i,j) = A(i,j) + A(k,l)
-            else
-              A(i,j) = A(i,j) - A(k,l)
-            end if
-          endif
-        enddo
-      enddo
-      if (count > 1.d0) then
-        A(i,j) = A(i,j) / count
-        do l=1,n
-          do k=1,m
-            if ( done(k,l) ) cycle
-            g = f * A(k,l)
-            if ( dabs(dabs(g) - 1.d0) < thresh2 ) then
-              done(k,l) = .True.
-              if (g>0.d0) then
-                A(k,l) = A(i,j)
-              else
-                A(k,l) = -A(i,j)
-              end if
-            endif
-          enddo
-        enddo
-      endif
-
-    enddo
-  enddo
-
-  ! Debug
-  !call wall_time(tf)
-  !print*,'Old version:',tf-ti,'s'
-
-  !max_diff = 0d0
-  !do j = 1, n
-  !  do i = 1, n
-  !    if (dabs(A(i,j)-B(i,j)) > max_diff) then
-  !      max_diff = dabs(A(i,j)-B(i,j))
-  !    endif
-  !  enddo
-  !enddo
-  !print*,'Max diff:', max_diff
-  !deallocate(B)
-
-end
-
-subroutine restore_symmetry_fast(m,n,A,LDA,thresh)
 
   implicit none
 
@@ -1794,58 +1689,53 @@ subroutine restore_symmetry_fast(m,n,A,LDA,thresh)
   double precision, intent(in) :: thresh
 
   double precision, allocatable :: copy(:), copy_sign(:)
-  integer, allocatable :: key(:)
+  integer, allocatable :: key(:), ii(:), jj(:)
   integer :: sze, pi, pf, idx, i,j,k
   double precision :: average, val, thresh2
 
   thresh2 = dsqrt(thresh)
-  call nullify_small_elements(m,n,A,LDA,thresh)
- 
+
   sze = m * n
 
-  allocate(copy(sze),copy_sign(sze),key(sze))
-  
+  allocate(copy(sze),copy_sign(sze),key(sze),ii(sze),jj(sze))
+
   ! Copy to 1D
-  !$OMP PARALLEL &
-  !$OMP SHARED(A,m,n,sze,copy_sign,copy,key) &
+  !$OMP PARALLEL if (m>100) &
+  !$OMP SHARED(A,m,n,sze,copy_sign,copy,key,ii,jj) &
   !$OMP PRIVATE(i,j,k) &
   !$OMP DEFAULT(NONE)
-  !$OMP DO
+  !$OMP DO COLLAPSE(2)
   do j = 1, n
     do i = 1, m
-      copy(i+(j-1)*m) = A(i,j)
+      k = i+(j-1)*m
+      copy(k) = A(i,j)
+      copy_sign(k) = sign(1.d0,copy(k))
+      copy(k) = -dabs(copy(k))
+      key(k) = k
+      ii(k) = i
+      jj(k) = j
     enddo
   enddo
   !$OMP END DO
-  ! Copy sign
-  !$OMP DO
-  do i = 1,sze
-    copy_sign(i) = sign(1d0,copy(i))
-    copy(i) = dabs(copy(i))
-  enddo
-  !$OMP END DO NOWAIT
-  ! Keys
-  !$OMP DO
-  do i = 1, sze
-   key(i) = i
-  enddo
-  !$OMP END DO
   !$OMP END PARALLEL
 
   ! Sort
   call dsort(copy,key,sze)
+  call iset_order(ii,key,sze)
+  call iset_order(jj,key,sze)
+  call dset_order(copy_sign,key,sze)
 
-  !TODO 
+  !TODO
   ! Parallelization with OMP
 
-  ! Jump all the elements below thresh
-  i = 1 
-  do while (copy(i) <= thresh) 
-    i = i + 1
-  enddo
+!  ! Skip all the elements below thresh
+!  i = 1
+!  do while (copy(i) <= thresh)
+!    i = i + 1
+!  enddo
 
   ! Symmetrize
-  do while(i < sze)
+  do while( (i < sze).and.(-copy(i) > thresh) )
     pi = i
     pf = i
     val = 1d0/copy(i)
@@ -1862,7 +1752,7 @@ subroutine restore_symmetry_fast(m,n,A,LDA,thresh)
       do j = pi, pf
         average = average + copy(j)
       enddo
-      average = average / (pf-pi+1)
+      average = average / (pf-pi+1.d0)
       do j = pi, pf
         copy(j) = average
       enddo
@@ -1872,24 +1762,23 @@ subroutine restore_symmetry_fast(m,n,A,LDA,thresh)
     ! Update i
     i = i + 1
   enddo
+  copy(i:) = 0.d0
 
-  !$OMP PARALLEL &
-  !$OMP SHARED(m,sze,copy_sign,copy,key,A) &
+  !$OMP PARALLEL if (sze>10000) &
+  !$OMP SHARED(m,sze,copy_sign,copy,key,A,ii,jj) &
   !$OMP PRIVATE(i,j,k,idx) &
   !$OMP DEFAULT(NONE)
   ! copy -> A
   !$OMP DO
   do k = 1, sze
-      idx = key(k)
-      i = mod(idx-1,m) + 1
-      j = (idx-1) / m + 1
-      ! New value with the right sign
-      A(i,j) = sign(copy(k),copy_sign(idx))
+      i = ii(k)
+      j = jj(k)
+      A(i,j) = sign(copy(k),copy_sign(k))
   enddo
   !$OMP END DO
   !$OMP END PARALLEL
 
-  deallocate(copy,copy_sign,key)
+  deallocate(copy,copy_sign,key,ii,jj)
 
 end
 

From b101fd398df55a5395059563028e1455a9c1f12a Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 17 Jan 2023 13:38:50 +0100
Subject: [PATCH 33/68] Removed collapse

---
 src/utils/linear_algebra.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index c8a03e65..c1304698 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1704,7 +1704,7 @@ subroutine restore_symmetry(m,n,A,LDA,thresh)
   !$OMP SHARED(A,m,n,sze,copy_sign,copy,key,ii,jj) &
   !$OMP PRIVATE(i,j,k) &
   !$OMP DEFAULT(NONE)
-  !$OMP DO COLLAPSE(2)
+  !$OMP DO
   do j = 1, n
     do i = 1, m
       k = i+(j-1)*m

From e953cf44156151192decc6b461f358e7d98b6cc8 Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Tue, 17 Jan 2023 13:40:59 +0100
Subject: [PATCH 34/68] avoid uninitialized phase in get_excitation

---
 src/determinants/slater_rules.irp.f | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f
index 9bab9213..10881ea5 100644
--- a/src/determinants/slater_rules.irp.f
+++ b/src/determinants/slater_rules.irp.f
@@ -126,6 +126,8 @@ subroutine get_excitation(det1,det2,exc,degree,phase,Nint)
       return
 
     case(0)
+      ! Avoid uninitialized phase
+      phase = 1d0 
       return
 
   end select

From 7dc17fd2d3c4e73117c40887f8c97f161720d19e Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 17 Jan 2023 16:33:40 +0100
Subject: [PATCH 35/68] trying to have coherent thresholds in cycle for TC ints

---
 src/ao_many_one_e_ints/grad2_jmu_manu.irp.f   | 38 +++++++++++--------
 .../grad_lapl_jmu_manu.irp.f                  | 16 ++++----
 src/non_h_ints_mu/new_grad_tc_manu.irp.f      |  1 +
 3 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
index d5210aa7..461583ca 100644
--- a/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad2_jmu_manu.irp.f
@@ -33,7 +33,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
      !$OMP          List_comb_thr_b3_cent, int2_grad1u2_grad2u2_j1b2_test, ao_abs_comb_b3_j1b,&
-     !$OMP          ao_overlap_abs,sq_pi_3_2)
+     !$OMP          ao_overlap_abs_grid,sq_pi_3_2)
  !$OMP DO SCHEDULE(dynamic)
  do ipoint = 1, n_points_final_grid
    r(1) = final_grid_points(1,ipoint)
@@ -41,7 +41,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
    r(3) = final_grid_points(3,ipoint)
    do i = 1, ao_num
      do j = i, ao_num
-       if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+       if(ao_overlap_abs_grid(j,i) .lt. 1.d-12) then
          cycle
        endif
   
@@ -61,7 +61,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test, (ao_num, ao_n
            call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
            coef_fit = -0.25d0 *  coef_gauss_1_erf_x_2(i_fit) * coef
 !           if(dabs(coef_fit*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
-           if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-10)cycle
+           if(dabs(coef_fit*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-12)cycle
   
 !           call overlap_gauss_r12_ao_with1s_v(B_center, beta, final_grid_points_transp, &
 !                 expo_fit, i, j, int_fit_v, n_points_final_grid)
@@ -123,14 +123,14 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao
      !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,      &
      !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,    &
      !$OMP          List_comb_thr_b3_cent, big_array,&
-     !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs)
+     !$OMP          ao_abs_comb_b3_j1b,ao_overlap_abs_grid)
 !
  allocate(int_fit_v(n_points_final_grid))
  !$OMP DO SCHEDULE(dynamic)
  do i = 1, ao_num
    do j = i, ao_num
 
-     if(ao_overlap_abs(j,i) .lt. 1.d-12) then
+     if(ao_overlap_abs_grid(j,i) .lt. 1.d-12) then
        cycle
      endif
 
@@ -139,7 +139,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2_test_v, (ao_num, ao
          coef        = List_comb_thr_b3_coef  (i_1s,j,i)
          beta        = List_comb_thr_b3_expo  (i_1s,j,i)
          int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
-!         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
+         if(dabs(coef)*dabs(int_j1b).lt.1.d-15)cycle
          B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
          B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
          B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
@@ -213,7 +213,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
  !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
  !$OMP          coef_fit, expo_fit, int_fit, tmp, int_j1b,factor_ij_1s,beta_ij,center_ij_1s)          & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
- !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          final_grid_points, ng_fit_jast,ao_overlap_abs_grid,      &
  !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
  !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,sq_pi_3_2,       & 
  !$OMP          List_comb_thr_b3_cent, int2_u2_j1b2_test,ao_abs_comb_b3_j1b)
@@ -225,6 +225,9 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
 
     do i = 1, ao_num
       do j = i, ao_num
+       if(ao_overlap_abs_grid(j,i) .lt. 1.d-12) then
+         cycle
+       endif
 
 
         tmp = 0.d0
@@ -233,7 +236,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
           coef        = List_comb_thr_b3_coef  (i_1s,j,i)
           beta        = List_comb_thr_b3_expo  (i_1s,j,i)
           int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
-          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle
           B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
           B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
@@ -245,7 +248,7 @@ BEGIN_PROVIDER [ double precision, int2_u2_j1b2_test, (ao_num, ao_num, n_points_
             !DIR$ FORCEINLINE
             call gaussian_product(expo_fit,r,beta,B_center,factor_ij_1s,beta_ij,center_ij_1s)
 !            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b).lt.1.d-10)cycle ! old version
-            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-10)cycle
+            if(dabs(coef_fit*coef*factor_ij_1s*int_j1b*sq_pi_3_2*(beta_ij)**(-3/2)).lt.1.d-12)cycle
           
             ! ---
           
@@ -307,7 +310,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
  !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
  !$OMP          tmp_x, tmp_y, tmp_z,int_j1b,sq_alpha)                        & 
  !$OMP SHARED  (n_points_final_grid, ao_num, List_comb_thr_b3_size, & 
- !$OMP          final_grid_points, ng_fit_jast,                     &
+ !$OMP          final_grid_points, ng_fit_jast, ao_overlap_abs_grid,&
  !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
  !$OMP          List_comb_thr_b3_coef, List_comb_thr_b3_expo,       & 
  !$OMP          List_comb_thr_b3_cent, int2_u_grad1u_x_j1b2_test,ao_abs_comb_b3_j1b,sq_pi_3_2)
@@ -320,6 +323,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
 
     do i = 1, ao_num
       do j = i, ao_num
+       if(ao_overlap_abs_grid(j,i) .lt. 1.d-12) then
+         cycle
+       endif
 
         tmp_x = 0.d0
         tmp_y = 0.d0
@@ -329,7 +335,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
           coef        = List_comb_thr_b3_coef  (i_1s,j,i)
           beta        = List_comb_thr_b3_expo  (i_1s,j,i)
           int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
-          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle
           B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
           B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
@@ -353,7 +359,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2_test, (3, ao_num, ao_num
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
             sq_alpha = alpha_1s_inv * dsqrt(alpha_1s_inv)
 !            if(dabs(coef_tmp*int_j1b) .lt. 1d-10) cycle ! old version
-            if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-10) cycle
+            if(dabs(coef_tmp*int_j1b*sq_pi_3_2*sq_alpha) .lt. 1d-14) cycle
             
             call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
 
@@ -432,7 +438,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p
   do ipoint = 1, n_points_final_grid
     do i = 1, ao_num
       do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle
         r(1) = final_grid_points(1,ipoint)
         r(2) = final_grid_points(2,ipoint)
         r(3) = final_grid_points(3,ipoint)
@@ -443,7 +449,7 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p
           coef        = List_comb_thr_b3_coef  (i_1s,j,i)
           beta        = List_comb_thr_b3_expo  (i_1s,j,i)
           int_j1b = ao_abs_comb_b3_j1b(i_1s,j,i)
-          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle
           B_center(1) = List_comb_thr_b3_cent(1,i_1s,j,i)
           B_center(2) = List_comb_thr_b3_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b3_cent(3,i_1s,j,i)
@@ -465,9 +471,9 @@ BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2_test, (ao_num, ao_num, n_p
             centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
 
             expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-            if(expo_coef_1s .gt. 20.d0) cycle
+            if(expo_coef_1s .gt. 80.d0) cycle
             coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            if(dabs(coef_tmp) .lt. 1d-08) cycle
+            if(dabs(coef_tmp) .lt. 1d-10) cycle
 
             int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
 
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
index f71a66e6..58a670e0 100644
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
+++ b/src/ao_many_one_e_ints/grad_lapl_jmu_manu.irp.f
@@ -38,7 +38,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num,
 
     do i = 1, ao_num
       do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle
 
         tmp = 0.d0
         do i_1s = 1, List_comb_thr_b2_size(j,i)
@@ -46,7 +46,7 @@ BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b_test, (ao_num, ao_num,
           coef        = List_comb_thr_b2_coef  (i_1s,j,i)
           beta        = List_comb_thr_b2_expo  (i_1s,j,i)
           int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
-          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle
           B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
@@ -143,7 +143,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
 
     do i = 1, ao_num
       do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-10)cycle
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle
 
         tmp_x = 0.d0
         tmp_y = 0.d0
@@ -153,7 +153,7 @@ BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b_test, (3, ao_num
           coef        = List_comb_thr_b2_coef  (i_1s,j,i)
           beta        = List_comb_thr_b2_expo  (i_1s,j,i)
           int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
-          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle
           B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
@@ -244,7 +244,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po
 
     do i = 1, ao_num
       do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle
 
         tmp = 0.d0
         do i_1s = 1, List_comb_thr_b2_size(j,i)
@@ -252,7 +252,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_test, (ao_num, ao_num, n_po
           coef        = List_comb_thr_b2_coef  (i_1s,j,i)
           beta        = List_comb_thr_b2_expo  (i_1s,j,i)
           int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
-          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle
           B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
@@ -337,7 +337,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num,
 
     do i = 1, ao_num
       do j = i, ao_num
-        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-20)cycle
+        if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle
 
         tmp = 0.d0
         do i_1s = 1, List_comb_thr_b2_size(j,i)
@@ -345,7 +345,7 @@ BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b_ng_1_test, (ao_num, ao_num,
           coef        = List_comb_thr_b2_coef  (i_1s,j,i)
           beta        = List_comb_thr_b2_expo  (i_1s,j,i)
           int_j1b = ao_abs_comb_b2_j1b(i_1s,j,i)
-          if(dabs(coef)*dabs(int_j1b).lt.1.d-10)cycle
+          if(dabs(coef)*dabs(int_j1b).lt.1.d-14)cycle
           B_center(1) = List_comb_thr_b2_cent(1,i_1s,j,i)
           B_center(2) = List_comb_thr_b2_cent(2,i_1s,j,i)
           B_center(3) = List_comb_thr_b2_cent(3,i_1s,j,i)
diff --git a/src/non_h_ints_mu/new_grad_tc_manu.irp.f b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
index cceb0991..a2287f66 100644
--- a/src/non_h_ints_mu/new_grad_tc_manu.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc_manu.irp.f
@@ -43,6 +43,7 @@ BEGIN_PROVIDER [ double precision, int2_grad1_u12_ao_test, (3, ao_num, ao_num, n
   
       do j = 1, ao_num
         do i = 1, ao_num
+!         if(dabs(ao_overlap_abs_grid(j,i)).lt.1.d-12)cycle
 
           tmp1 = tmp0 * v_ij_erf_rk_cst_mu_j1b_test(i,j,ipoint)
           tmp2 = v_ij_u_cst_mu_j1b_test(i,j,ipoint)

From 66ce35f5dbfbea8f07daf3a631faf8d2bcdc269c Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Wed, 18 Jan 2023 09:05:16 +0100
Subject: [PATCH 36/68] bugfix in restore_symmetry

---
 src/utils/linear_algebra.irp.f | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/utils/linear_algebra.irp.f b/src/utils/linear_algebra.irp.f
index c1304698..44344a19 100644
--- a/src/utils/linear_algebra.irp.f
+++ b/src/utils/linear_algebra.irp.f
@@ -1735,6 +1735,7 @@ subroutine restore_symmetry(m,n,A,LDA,thresh)
 !  enddo
 
   ! Symmetrize
+  i = 1
   do while( (i < sze).and.(-copy(i) > thresh) )
     pi = i
     pf = i

From 7eb14fc0f836b0b98e23b701103652fb8e580ef1 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 18 Jan 2023 11:20:39 +0100
Subject: [PATCH 37/68] added point charges with the python script to write the
 point charges

---
 src/ao_one_e_ints/EZFIO.cfg           |  18 ++
 src/ao_one_e_ints/point_charges.irp.f | 272 ++++++++++++++++++++++++++
 src/ao_one_e_ints/write_pt_charges.py |  73 +++++++
 3 files changed, 363 insertions(+)
 create mode 100644 src/ao_one_e_ints/point_charges.irp.f
 create mode 100755 src/ao_one_e_ints/write_pt_charges.py

diff --git a/src/ao_one_e_ints/EZFIO.cfg b/src/ao_one_e_ints/EZFIO.cfg
index 8d4fff57..8ad83bd4 100644
--- a/src/ao_one_e_ints/EZFIO.cfg
+++ b/src/ao_one_e_ints/EZFIO.cfg
@@ -106,3 +106,21 @@ interface: ezfio,provider,ocaml
 default: 1.e-15
 ezfio_name: threshold_ao
 
+[n_pts_charge]
+type: integer
+doc: Number of point charges to be added to the potential
+interface: ezfio
+default: 0
+
+[pts_charge_z]
+type: double precision
+doc: Charge associated to each point charge
+interface: ezfio
+size: (ao_one_e_ints.n_pts_charge)
+
+[pts_charge_coord]
+type: double precision
+doc: Coordinate of each point charge.
+interface: ezfio
+size: (ao_one_e_ints.n_pts_charge,3)
+
diff --git a/src/ao_one_e_ints/point_charges.irp.f b/src/ao_one_e_ints/point_charges.irp.f
new file mode 100644
index 00000000..aa80e167
--- /dev/null
+++ b/src/ao_one_e_ints/point_charges.irp.f
@@ -0,0 +1,272 @@
+
+! ---
+
+
+BEGIN_PROVIDER [ integer, n_pts_charge  ]
+  implicit none
+  BEGIN_DOC
+! Number of point charges to be added to the potential
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  if (mpi_master) then
+    
+    call ezfio_has_ao_one_e_ints_n_pts_charge(has)
+    if (has) then
+      write(6,'(A)') '.. >>>>> [ IO READ: n_pts_charge ] <<<<< ..'
+      call ezfio_get_ao_one_e_ints_n_pts_charge(n_pts_charge)
+    else
+      print *, 'ao_one_e_ints/n_pts_charge not found in EZFIO file'
+      stop 1
+    endif
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST( n_pts_charge, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read n_pts_charge with MPI'
+    endif
+  IRP_ENDIF
+
+  call write_time(6)
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, pts_charge_z, (n_pts_charge) ]
+
+  BEGIN_DOC
+  ! Charge associated to each point charge. 
+  END_DOC
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_ao_one_e_ints_pts_charge_z(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read pts_charge_z with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+
+    if (mpi_master) then
+      write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_z ] <<<<< ..'
+      call ezfio_get_ao_one_e_ints_pts_charge_z(pts_charge_z)
+      IRP_IF MPI
+        call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          stop 'Unable to read pts_charge_z with MPI'
+        endif
+      IRP_ENDIF
+    endif
+
+  else
+ 
+    integer :: i
+    do i = 1, n_pts_charge
+      pts_charge_z(i) = 0.d0
+    enddo
+
+  endif
+ print*,'Point charges '
+ do i = 1, n_pts_charge
+  print*,'i,pts_charge_z(i)',i,pts_charge_z(i)
+ enddo
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, pts_charge_coord, (n_pts_charge,3) ]
+
+  BEGIN_DOC
+  ! Coordinates of each point charge. 
+  END_DOC
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_ao_one_e_ints_pts_charge_coord(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read pts_charge_coord with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+
+    if (mpi_master) then
+     double precision, allocatable  :: buffer(:,:)
+     allocate (buffer(n_pts_charge,3))
+      write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_coord ] <<<<< ..'
+      call ezfio_get_ao_one_e_ints_pts_charge_coord(buffer)
+      integer :: i,j
+      do i=1,3
+        do j=1,n_pts_charge
+          pts_charge_coord(j,i) = buffer(j,i)
+        enddo
+      enddo
+     deallocate(buffer)
+      IRP_IF MPI
+        call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          stop 'Unable to read pts_charge_coord with MPI'
+        endif
+      IRP_ENDIF
+    endif
+
+  else
+ 
+    do i = 1, n_pts_charge
+      pts_charge_coord(i,:) = 0.d0
+    enddo
+
+  endif
+ print*,'Coordinates for the point charges '
+ do i = 1, n_pts_charge
+  write(*,'(I3,X,3(F16.8,X))'),i,pts_charge_coord(i,1:3)
+ enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_integrals_pt_chrg, (ao_num,ao_num)]
+
+  BEGIN_DOC
+  !  Point charge-electron interaction, in the |AO| basis set.
+  !
+  !  :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle`
+  !
+  !  These integrals also contain the pseudopotential integrals.
+  END_DOC
+
+  implicit none
+  integer          :: num_A, num_B, power_A(3), power_B(3)
+  integer          :: i, j, k, l, n_pt_in, m
+  double precision :: alpha, beta
+  double precision :: A_center(3),B_center(3),C_center(3)
+  double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
+
+  ao_integrals_pt_chrg = 0.d0
+
+!  if (read_ao_integrals_pt_chrg) then
+!
+!    call ezfio_get_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
+!    print *,  'AO N-e integrals read from disk'
+!
+!  else
+
+!    if(use_cosgtos) then
+!      !print *, " use_cosgtos for ao_integrals_pt_chrg ?", use_cosgtos
+!
+!      do j = 1, ao_num
+!        do i = 1, ao_num
+!          ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg_cosgtos(i,j)
+!        enddo
+!      enddo
+!
+!    else
+
+      !$OMP PARALLEL                                                   &
+          !$OMP DEFAULT (NONE)                                         &
+          !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
+          !$OMP          num_A,num_B,Z,c,c1,n_pt_in)                      &
+          !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,pts_charge_coord,ao_coef_normalized_ordered_transp,&
+          !$OMP         n_pt_max_integrals,ao_integrals_pt_chrg,n_pts_charge,pts_charge_z)
+
+      n_pt_in = n_pt_max_integrals
+
+      !$OMP DO SCHEDULE (dynamic)
+
+      do j = 1, ao_num
+        num_A = ao_nucl(j)
+        power_A(1:3)= ao_power(j,1:3)
+        A_center(1:3) = pts_charge_coord(num_A,1:3)
+
+        do i = 1, ao_num
+
+          num_B = ao_nucl(i)
+          power_B(1:3)= ao_power(i,1:3)
+          B_center(1:3) = pts_charge_coord(num_B,1:3)
+
+          do l=1,ao_prim_num(j)
+            alpha = ao_expo_ordered_transp(l,j)
+
+            do m=1,ao_prim_num(i)
+              beta = ao_expo_ordered_transp(m,i)
+
+              double precision               :: c, c1
+              c = 0.d0
+
+              do  k = 1, n_pts_charge
+                double precision               :: Z
+                Z = pts_charge_z(k)
+
+                C_center(1:3) = pts_charge_coord(k,1:3)
+
+                c1 = NAI_pol_mult( A_center, B_center, power_A, power_B &
+                                 , alpha, beta, C_center, n_pt_in )
+
+                c = c - Z * c1
+
+              enddo
+              ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg(i,j)  &
+                  + ao_coef_normalized_ordered_transp(l,j)             &
+                  * ao_coef_normalized_ordered_transp(m,i) * c
+            enddo
+          enddo
+        enddo
+      enddo
+
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+!    endif
+
+
+!    IF(do_pseudo) THEN
+!       ao_integrals_pt_chrg += ao_pseudo_integrals
+!    ENDIF
+
+!  endif
+
+
+!  if (write_ao_integrals_pt_chrg) then
+!    call ezfio_set_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
+!    print *,  'AO N-e integrals written to disk'
+!  endif
+
+END_PROVIDER
diff --git a/src/ao_one_e_ints/write_pt_charges.py b/src/ao_one_e_ints/write_pt_charges.py
new file mode 100755
index 00000000..086f1e52
--- /dev/null
+++ b/src/ao_one_e_ints/write_pt_charges.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python  
+import os
+import sys
+
+
+def zip_in_ezfio(ezfio,tmp):
+  tmpzip=tmp+".gz"
+  cmdzip="gzip -c "+tmp+" > "+tmpzip
+  os.system(cmdzip)
+  os.system("rm "+tmp)
+  cmdmv="mv "+tmpzip+" "+EZFIO+"/ao_one_e_ints/"+tmpzip
+  os.system(cmdmv)
+
+def mv_in_ezfio(ezfio,tmp):
+  cmdmv="mv "+tmp+" "+EZFIO+"/ao_one_e_ints/"+tmp
+  os.system(cmdmv)
+
+
+# Getting the EZFIO
+EZFIO=sys.argv[1]
+EZFIO=EZFIO.replace("/", "")
+print(EZFIO)
+
+# Reading the point charges and convert the Angstrom geometry in Bohr for QP
+f = open('point_charges.xyz','r')
+lines = f.readlines()
+convert_angs_to_bohr=1.88973
+n_charges=0
+coord_x=[]
+coord_y=[]
+coord_z=[]
+charges=[]
+for line in lines:
+  data = line.split()
+  if(len(data)>0):
+   n_charges += 1
+   charges.append(str(data[0]))
+   coord_x.append(str(convert_angs_to_bohr*float(data[1])))
+   coord_y.append(str(convert_angs_to_bohr*float(data[2])))
+   coord_z.append(str(convert_angs_to_bohr*float(data[3])))
+
+# Write the file containing the number of charges and set in EZFIO folder
+tmp="n_pts_charge"
+fncharges = open(tmp,'w')
+fncharges.write(" "+str(n_charges)+'\n')
+fncharges.close()
+mv_in_ezfio(EZFIO,tmp)
+
+# Write the file containing the charges and set in EZFIO folder 
+tmp="pts_charge_z"
+fcharges = open(tmp,'w')
+fcharges.write(" 1\n")
+fcharges.write(" "+str(n_charges)+'\n')
+for i in range(n_charges):
+ fcharges.write(charges[i]+'\n')
+fcharges.close()
+zip_in_ezfio(EZFIO,tmp)
+
+# Write the file containing the charge coordinates and set in EZFIO folder
+tmp="pts_charge_coord"
+fcoord = open(tmp,'w')
+fcoord.write("  2\n")
+fcoord.write("                   "+str(n_charges)+'                    3\n')
+#fcoord.write(" "+'   3 '+str(n_charges)+' \n')
+for i in range(n_charges):
+ fcoord.write('   '+coord_x[i]+'\n')
+for i in range(n_charges):
+ fcoord.write('   '+coord_y[i]+'\n')
+for i in range(n_charges):
+ fcoord.write('   '+coord_z[i]+'\n')
+fcoord.close()
+zip_in_ezfio(EZFIO,tmp)
+

From 8ea1b5c023de6acfdde896baf1a9cd24ed1dd556 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 18 Jan 2023 11:36:15 +0100
Subject: [PATCH 38/68] added the point charges

---
 src/ao_one_e_ints/point_charges.irp.f |  8 ++++----
 src/ao_one_e_ints/write_pt_charges.py | 10 ++++++++--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/ao_one_e_ints/point_charges.irp.f b/src/ao_one_e_ints/point_charges.irp.f
index aa80e167..c038458d 100644
--- a/src/ao_one_e_ints/point_charges.irp.f
+++ b/src/ao_one_e_ints/point_charges.irp.f
@@ -204,7 +204,7 @@ BEGIN_PROVIDER [ double precision, ao_integrals_pt_chrg, (ao_num,ao_num)]
           !$OMP DEFAULT (NONE)                                         &
           !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
           !$OMP          num_A,num_B,Z,c,c1,n_pt_in)                      &
-          !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,pts_charge_coord,ao_coef_normalized_ordered_transp,&
+          !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,pts_charge_coord,ao_coef_normalized_ordered_transp,nucl_coord,&
           !$OMP         n_pt_max_integrals,ao_integrals_pt_chrg,n_pts_charge,pts_charge_z)
 
       n_pt_in = n_pt_max_integrals
@@ -214,13 +214,13 @@ BEGIN_PROVIDER [ double precision, ao_integrals_pt_chrg, (ao_num,ao_num)]
       do j = 1, ao_num
         num_A = ao_nucl(j)
         power_A(1:3)= ao_power(j,1:3)
-        A_center(1:3) = pts_charge_coord(num_A,1:3)
+        A_center(1:3) = nucl_coord(num_A,1:3)
 
         do i = 1, ao_num
 
           num_B = ao_nucl(i)
           power_B(1:3)= ao_power(i,1:3)
-          B_center(1:3) = pts_charge_coord(num_B,1:3)
+          B_center(1:3) = nucl_coord(num_B,1:3)
 
           do l=1,ao_prim_num(j)
             alpha = ao_expo_ordered_transp(l,j)
@@ -240,7 +240,7 @@ BEGIN_PROVIDER [ double precision, ao_integrals_pt_chrg, (ao_num,ao_num)]
                 c1 = NAI_pol_mult( A_center, B_center, power_A, power_B &
                                  , alpha, beta, C_center, n_pt_in )
 
-                c = c - Z * c1
+                c = c + Z * c1
 
               enddo
               ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg(i,j)  &
diff --git a/src/ao_one_e_ints/write_pt_charges.py b/src/ao_one_e_ints/write_pt_charges.py
index 086f1e52..d4b6d251 100755
--- a/src/ao_one_e_ints/write_pt_charges.py
+++ b/src/ao_one_e_ints/write_pt_charges.py
@@ -2,6 +2,11 @@
 import os
 import sys
 
+# First argument is the EZFIO file
+# It reads a file EZFIO_point_charges.xyz written in this way:
+# charge x y z (Angstrom)
+# for all charges
+
 
 def zip_in_ezfio(ezfio,tmp):
   tmpzip=tmp+".gz"
@@ -22,9 +27,10 @@ EZFIO=EZFIO.replace("/", "")
 print(EZFIO)
 
 # Reading the point charges and convert the Angstrom geometry in Bohr for QP
-f = open('point_charges.xyz','r')
+f = open(EZFIO+'_point_charges.xyz','r')
 lines = f.readlines()
-convert_angs_to_bohr=1.88973
+convert_angs_to_bohr=1.8897259885789233
+
 n_charges=0
 coord_x=[]
 coord_y=[]

From ec05b8c32940b718b395841ad07dd7c4e4694d38 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Wed, 18 Jan 2023 11:40:14 +0100
Subject: [PATCH 39/68] added the keyword for point charges in ao_one_e_ints

---
 src/ao_one_e_ints/EZFIO.cfg         | 5 +++++
 src/ao_one_e_ints/pot_ao_ints.irp.f | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/src/ao_one_e_ints/EZFIO.cfg b/src/ao_one_e_ints/EZFIO.cfg
index 8ad83bd4..262301e0 100644
--- a/src/ao_one_e_ints/EZFIO.cfg
+++ b/src/ao_one_e_ints/EZFIO.cfg
@@ -124,3 +124,8 @@ doc: Coordinate of each point charge.
 interface: ezfio
 size: (ao_one_e_ints.n_pts_charge,3)
 
+[point_charges]
+type: logical
+doc: If |true|, point charges (see ao_one_e_ints/write_pt_charges.py) are added to the one-electron potential
+interface: ezfio,provider,ocaml
+default: False
diff --git a/src/ao_one_e_ints/pot_ao_ints.irp.f b/src/ao_one_e_ints/pot_ao_ints.irp.f
index 928053ad..20e299af 100644
--- a/src/ao_one_e_ints/pot_ao_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_ints.irp.f
@@ -104,6 +104,9 @@ BEGIN_PROVIDER [ double precision, ao_integrals_n_e, (ao_num,ao_num)]
     IF(do_pseudo) THEN
        ao_integrals_n_e += ao_pseudo_integrals
     ENDIF
+    IF(point_charges) THEN
+       ao_integrals_n_e += ao_integrals_pt_chrg
+    ENDIF 
 
   endif
 

From 2e45413f44ae3cf7d1aea0167efd4cfca915b406 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Thu, 19 Jan 2023 17:59:10 +0100
Subject: [PATCH 40/68] added optimization for Slater_tc in two-e elements

---
 src/bi_ort_ints/total_twoe_pot.irp.f   |  26 ++++
 src/determinants/slater_rules.irp.f    |   8 +-
 src/tc_bi_ortho/slater_tc_opt.irp.f    | 208 +++++++++++++++++++++++++
 src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 119 +-------------
 4 files changed, 246 insertions(+), 115 deletions(-)
 create mode 100644 src/tc_bi_ortho/slater_tc_opt.irp.f

diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f
index 31cf0624..fef43f93 100644
--- a/src/bi_ort_ints/total_twoe_pot.irp.f
+++ b/src/bi_ort_ints/total_twoe_pot.irp.f
@@ -199,3 +199,29 @@ END_PROVIDER
 
 ! ---
 
+
+ BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_exchange, (mo_num,mo_num) ]
+&BEGIN_PROVIDER [ double precision, mo_bi_ortho_tc_two_e_jj_anti, (mo_num,mo_num) ]
+  implicit none
+  BEGIN_DOC
+  ! mo_bi_ortho_tc_two_e_jj(i,j) = J_ij = <ji|W-K|ji>
+  ! mo_bi_ortho_tc_two_e_jj_exchange(i,j) = K_ij = <ij|W-K|ji>
+  ! mo_bi_ortho_tc_two_e_jj_anti(i,j) = J_ij - K_ij
+  END_DOC
+
+  integer                        :: i,j
+  double precision               :: get_two_e_integral
+
+  mo_bi_ortho_tc_two_e_jj = 0.d0
+  mo_bi_ortho_tc_two_e_jj_exchange = 0.d0
+
+  do i=1,mo_num
+    do j=1,mo_num
+      mo_bi_ortho_tc_two_e_jj(i,j) = mo_bi_ortho_tc_two_e(j,i,j,i)
+      mo_bi_ortho_tc_two_e_jj_exchange(i,j) = mo_bi_ortho_tc_two_e(i,j,j,i)
+      mo_bi_ortho_tc_two_e_jj_anti(i,j) = mo_bi_ortho_tc_two_e_jj(i,j) - mo_bi_ortho_tc_two_e_jj_exchange(i,j)
+    enddo
+  enddo
+
+END_PROVIDER
diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f
index b9710fd1..78607b7c 100644
--- a/src/determinants/slater_rules.irp.f
+++ b/src/determinants/slater_rules.irp.f
@@ -1790,12 +1790,12 @@ double precision function diag_H_mat_elem(det_in,Nint)
   integer                        :: tmp(2)
   !DIR$ FORCEINLINE
   call bitstring_to_list_ab(particle, occ_particle, tmp, Nint)
-  ASSERT (tmp(1) == nexc(1))
-  ASSERT (tmp(2) == nexc(2))
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
   !DIR$ FORCEINLINE
   call bitstring_to_list_ab(hole, occ_hole, tmp, Nint)
-  ASSERT (tmp(1) == nexc(1))
-  ASSERT (tmp(2) == nexc(2))
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
 
   det_tmp = ref_bitmask
   do ispin=1,2
diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f
new file mode 100644
index 00000000..0374cb81
--- /dev/null
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@@ -0,0 +1,208 @@
+ BEGIN_PROVIDER [ double precision, ref_tc_energy_tot]
+&BEGIN_PROVIDER [ double precision, ref_tc_energy_1e]
+&BEGIN_PROVIDER [ double precision, ref_tc_energy_2e]
+&BEGIN_PROVIDER [ double precision, ref_tc_energy_3e]
+ implicit none
+ BEGIN_DOC
+! Various component of the TC energy for the reference "HF" Slater determinant
+ END_DOC 
+ double precision :: hmono, htwoe, htot, hthree
+ call diag_htilde_mu_mat_bi_ortho(N_int,HF_bitmask , hmono, htwoe, htot)
+ ref_tc_energy_1e = hmono
+ ref_tc_energy_2e = htwoe 
+ if(three_body_h_tc)then
+  call diag_htilde_three_body_ints_bi_ort(N_int, HF_bitmask, hthree)
+  ref_tc_energy_3e = hthree
+ else
+  ref_tc_energy_3e = 0.d0
+ endif
+ ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e
+ END_PROVIDER 
+
+subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot)
+  implicit none
+  BEGIN_DOC
+  ! Computes $\langle i|H|i \rangle$.
+  END_DOC
+  integer,intent(in)             :: Nint
+  integer(bit_kind),intent(in)   :: det_in(Nint,2)
+  double precision, intent(out)  :: hmono,htwoe,htot,hthree
+
+  integer(bit_kind)              :: hole(Nint,2)
+  integer(bit_kind)              :: particle(Nint,2)
+  integer                        :: i, nexc(2), ispin
+  integer                        :: occ_particle(Nint*bit_kind_size,2)
+  integer                        :: occ_hole(Nint*bit_kind_size,2)
+  integer(bit_kind)              :: det_tmp(Nint,2)
+  integer                        :: na, nb
+
+  ASSERT (Nint > 0)
+  ASSERT (sum(popcnt(det_in(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(det_in(:,2))) == elec_beta_num)
+
+
+  nexc(1) = 0
+  nexc(2) = 0
+  do i=1,Nint
+    hole(i,1)     = xor(det_in(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(det_in(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),det_in(i,1))
+    particle(i,2) = iand(hole(i,2),det_in(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+
+  if (nexc(1)+nexc(2) == 0) then
+    htot = ref_tc_energy_tot
+    return
+  endif
+
+  !call debug_det(det_in,Nint)
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+
+  
+  det_tmp = ref_bitmask
+  hmono = ref_tc_energy_1e
+  htwoe = ref_tc_energy_2e 
+  hthree= ref_tc_energy_3e
+
+  do ispin=1,2
+    na = elec_num_tab(ispin)
+    nb = elec_num_tab(iand(ispin,1)+1)
+    do i=1,nexc(ispin)
+      !DIR$ FORCEINLINE
+      call ac_tc_operator( occ_particle(i,ispin), ispin, det_tmp, hmono,htwoe,hthree, Nint,na,nb)
+      !DIR$ FORCEINLINE
+      call a_tc_operator ( occ_hole    (i,ispin), ispin, det_tmp, hmono,htwoe,hthree, Nint,na,nb)
+    enddo
+  enddo
+  htot = hmono+htwoe+hthree
+end
+
+subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the ADDITION of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the ADDITION of that electron 
+  !
+  ! and the quantities hmono,htwoe,hthree are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe,hthree
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i
+
+  if (iorb < 1) then
+    print *,  irp_here, ': iorb < 1'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+  if (iorb > mo_num) then
+    print *,  irp_here, ': iorb > mo_num'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  ASSERT (tmp(1) == elec_alpha_num)
+  ASSERT (tmp(2) == elec_beta_num)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key(k,ispin) = ibset(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  hmono = hmono + mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+  na = na+1
+end
+
+subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the REMOVAL of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the REMOVAL of that electron 
+  !
+  ! and the quantities hmono,htwoe,hthree are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe,hthree
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i
+  integer                        :: tmp(2)
+
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k>0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  key(k,ispin) = ibclr(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  na = na-1
+
+  hmono = hmono - mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+end
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index 2d71b6b2..094c9bbc 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -11,121 +11,18 @@ program tc_bi_ortho
   touch read_wf
   touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 
- ! call routine_2
-  call test_rout
+  call test_slater_tc_opt
 end
 
-subroutine test_rout
+subroutine test_slater_tc_opt
  implicit none
- integer :: i,j,ii,jj
-  use bitmasks ! you need to include the bitmasks_module.f90 features
-  integer(bit_kind), allocatable :: det_i(:,:)
-  allocate(det_i(N_int,2))
-  det_i(:,:)= psi_det(:,:,1)
-  call debug_det(det_i,N_int)
-  integer, allocatable           :: occ(:,:)
-  integer                        :: n_occ_ab(2)
-  allocate(occ(N_int*bit_kind_size,2))
-  call bitstring_to_list_ab(det_i, occ, n_occ_ab, N_int)
-  double precision :: hmono, htwoe, htot
-  call diag_htilde_mu_mat_bi_ortho(N_int, det_i, hmono, htwoe, htot)
-  print*,'hmono, htwoe, htot'
-  print*, hmono, htwoe, htot 
-  print*,'alpha electrons orbital occupancy'
-  do i = 1, n_occ_ab(1) ! browsing the alpha electrons
-    j = occ(i,1)
-    print*,j,mo_bi_ortho_tc_one_e(j,j)
-  enddo
-  print*,'beta  electrons orbital occupancy'
-  do i = 1, n_occ_ab(2) ! browsing the beta  electrons
-    j = occ(i,2)
-    print*,j,mo_bi_ortho_tc_one_e(j,j)
-  enddo
-  print*,'alpha beta'
-  do i = 1, n_occ_ab(1)
-   ii = occ(i,1)
-   do j = 1, n_occ_ab(2)
-    jj = occ(j,2)
-    print*,ii,jj,mo_bi_ortho_tc_two_e(jj,ii,jj,ii) 
-   enddo
-  enddo
-  print*,'alpha alpha'
-  do i = 1, n_occ_ab(1)
-   ii = occ(i,1)
-   do j = 1, n_occ_ab(1)
-    jj = occ(j,1)
-    print*,ii,jj,mo_bi_ortho_tc_two_e(jj,ii,jj,ii), mo_bi_ortho_tc_two_e(ii,jj,jj,ii)
-   enddo
-  enddo
-
-  print*,'beta beta'
-  do i = 1, n_occ_ab(2)
-   ii = occ(i,2)
-   do j = 1, n_occ_ab(2)
-    jj = occ(j,2)
-    print*,ii,jj,mo_bi_ortho_tc_two_e(jj,ii,jj,ii), mo_bi_ortho_tc_two_e(ii,jj,jj,ii)
-   enddo
-  enddo
- 
-
-end
-
-subroutine routine_2
- implicit none
- integer :: i
- double precision :: bi_ortho_mo_ints
- print*,'H matrix'
+ integer :: i,j
+ double precision :: hmono, htwoe, htot, hthree 
+ double precision :: hnewmono, hnewtwoe, hnewthnewree, hnewtot
  do i = 1, N_det
-  write(*,'(1000(F16.5,X))')htilde_matrix_elmt_bi_ortho(:,i)
- enddo
- i = 1
- double precision :: phase
- integer :: degree,h1, p1, h2, p2, s1, s2, exc(0:2,2,2)
- call get_excitation_degree(ref_bitmask, psi_det(1,1,i), degree, N_int)
- if(degree==2)then
-  call get_double_excitation(ref_bitmask, psi_det(1,1,i), exc, phase, N_int)
-  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
-  print*,'h1,h2,p1,p2'
-  print*, h1,h2,p1,p2 
-  print*,mo_bi_ortho_tc_two_e(p1,p2,h1,h2),mo_bi_ortho_tc_two_e(h1,h2,p1,p2)
- endif
-
- 
- print*,'coef'
- do i = 1, ao_num
-  print*,i,mo_l_coef(i,8),mo_r_coef(i,8)
- enddo
-! print*,'mdlqfmlqgmqglj'
-! print*,'mo_bi_ortho_tc_two_e()',mo_bi_ortho_tc_two_e(2,2,3,3)
-! print*,'bi_ortho_mo_ints      ',bi_ortho_mo_ints(2,2,3,3)
- print*,'Overlap'
- do i = 1, mo_num
-  write(*,'(100(F16.10,X))')overlap_bi_ortho(:,i)
+  call diag_htilde_mu_mat_bi_ortho(N_int, psi_det(1,1,i), hmono, htwoe, htot)
+  call diag_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), hnewmono, hnewtwoe, hnewthnewree, hnewtot)
+  print*,htot,hnewtot,dabs(htot-hnewtot) 
  enddo
 
 end
-
-subroutine routine
- implicit none
- double precision :: hmono,htwoe,hthree,htot
- integer(bit_kind), allocatable  :: key1(:,:)
- integer(bit_kind), allocatable  :: key2(:,:)
- allocate(key1(N_int,2),key2(N_int,2))
- use bitmasks
- key1 = ref_bitmask
- call htilde_mu_mat_bi_ortho(key1,key1, N_int, hmono,htwoe,hthree,htot)
- key2 = key1
- integer :: h,p,i_ok
- h = 1
- p = 8
- call do_single_excitation(key2,h,p,1,i_ok) 
- call debug_det(key2,N_int)
- call htilde_mu_mat_bi_ortho(key2,key1, N_int, hmono,htwoe,hthree,htot)
-! print*,'fock_matrix_tc_mo_alpha(p,h) =  ',fock_matrix_tc_mo_alpha(p,h)
- print*,'htot                         =  ',htot
- print*,'hmono                        =  ',hmono 
- print*,'htwoe                        =  ',htwoe
- double precision :: bi_ortho_mo_ints
- print*,'bi_ortho_mo_ints(1,p,1,h)',bi_ortho_mo_ints(1,p,1,h)
-
-end

From 1651242fba9b0450003c32ea6c868b28f77f8443 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Thu, 19 Jan 2023 18:45:43 +0100
Subject: [PATCH 41/68] beginning to optimize the single excitations on tc

---
 src/tc_bi_ortho/slater_tc_opt.irp.f        |   1 +
 src/tc_bi_ortho/slater_tc_opt_single.irp.f | 202 +++++++++++++++++++++
 2 files changed, 203 insertions(+)
 create mode 100644 src/tc_bi_ortho/slater_tc_opt_single.irp.f

diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f
index 0374cb81..50886fe3 100644
--- a/src/tc_bi_ortho/slater_tc_opt.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@@ -206,3 +206,4 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
   enddo
 
 end
+
diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
new file mode 100644
index 00000000..a69f5d2e
--- /dev/null
+++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
@@ -0,0 +1,202 @@
+
+
+subroutine  single_htilde_mu_mat_fock_bi_ortho (Nint, key_j, key_i, hmono, htwoe, htot)
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, htot
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int, phase
+  double precision              :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_j_core(Nint,2), key_i_core(Nint,2)
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  hmono = 0.d0
+  htwoe= 0.d0
+  htot = 0.d0
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.ne.1)then
+   return
+  endif
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc,1,h1,p1,h2,p2,s1,s2)
+  call get_single_excitation_from_fock_tc(key_i,key_j,h1,p1,s1,phase,hmono,htwoe,hthree,htot)
+end
+
+ BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals, (mo_num,mo_num, mo_num)]
+&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals,(mo_num,mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! tc_2e_3idx_coulomb_integrals(j,k,i)  = <jk|ji> 
+ !
+ ! tc_2e_3idx_exchange_integrals(j,k,i) = <kj|ji> 
+ END_DOC
+ integer :: i,j,k,l
+ double precision :: get_two_e_integral
+ double precision :: integral
+
+ do k = 1, mo_num
+  do i = 1, mo_num
+   do j = 1, mo_num
+     tc_2e_3idx_coulomb_integrals(j, k,i) = mo_bi_ortho_tc_two_e(j ,k ,j ,i ) 
+     tc_2e_3idx_exchange_integrals(j,k,i) = mo_bi_ortho_tc_two_e(k ,j ,j ,i ) 
+   enddo
+  enddo
+ enddo
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [double precision, fock_op_2_e_tc_closed_shell, (mo_num, mo_num) ]
+ implicit none
+ BEGIN_DOC
+! Closed-shell part of the Fock operator for the TC operator
+ END_DOC
+ integer :: h0,p0,h,p,k0,k,i
+ integer :: n_occ_ab(2)
+ integer :: occ(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_virt(2)
+ integer :: occ_virt(N_int*bit_kind_size,2)
+ integer(bit_kind) :: key_test(N_int)
+ integer(bit_kind) :: key_virt(N_int,2)
+
+ call bitstring_to_list_ab(ref_closed_shell_bitmask, occ, n_occ_ab, N_int)
+ do i = 1, N_int
+  key_virt(i,1) = full_ijkl_bitmask(i)
+  key_virt(i,2) = full_ijkl_bitmask(i)
+  key_virt(i,1) = xor(key_virt(i,1),ref_closed_shell_bitmask(i,1))
+  key_virt(i,2) = xor(key_virt(i,2),ref_closed_shell_bitmask(i,2))
+ enddo
+ call bitstring_to_list_ab(key_virt, occ_virt, n_occ_ab_virt, N_int)
+ ! docc ---> virt single excitations
+ do h0 = 1,  n_occ_ab(1)
+  h=occ(h0,1)
+  do p0 = 1, n_occ_ab_virt(1)
+   p = occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - array_exchange(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ ! virt ---> virt single excitations
+ do h0 = 1,  n_occ_ab_virt(1)
+  h=occ_virt(h0,1)
+  do p0 = 1, n_occ_ab_virt(1)
+   p = occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - array_exchange(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ ! docc ---> docc single excitations
+ do h0 = 1,  n_occ_ab(1)
+  h=occ(h0,1)
+  do p0 = 1, n_occ_ab(1)
+   p = occ(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - array_exchange(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+END_PROVIDER
+
+
+subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,htwoe,hthree,htot)
+ use bitmasks
+ implicit none
+ integer,intent(in) :: h,p,spin
+ double precision, intent(in)  :: phase
+ integer(bit_kind), intent(in) :: key_i(N_int,2), key_j(N_int,2)
+ double precision, intent(out) :: hmono,htwoe,hthree,htot
+ integer(bit_kind) :: differences(N_int,2)
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: partcl(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_partcl(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
+ integer :: i0,i
+ double precision :: buffer_c(mo_num),buffer_x(mo_num)
+ do i=1, mo_num
+   buffer_c(i) = tc_2e_3idx_coulomb_integrals(i,p,h)
+   buffer_x(i) = tc_2e_3idx_exchange_integrals(i,p,h)
+ enddo
+ do i = 1, N_int
+  differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1))
+  differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2))
+  hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1))
+  hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2))
+  partcl(i,1) = iand(differences(i,1),key_i(i,1))
+  partcl(i,2) = iand(differences(i,2),key_i(i,2))
+ enddo
+ call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
+ call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
+ hmono = mo_bi_ortho_tc_one_e(p,h)
+ htwoe = fock_op_2_e_tc_closed_shell(p,h)
+ ! holes :: direct terms
+ do i0 = 1, n_occ_ab_hole(1)
+  i = occ_hole(i0,1)
+  htwoe -= buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_hole(2)
+  i = occ_hole(i0,2)
+  htwoe -= buffer_c(i)
+ enddo
+
+ ! holes :: exchange terms
+ do i0 = 1, n_occ_ab_hole(spin)
+  i = occ_hole(i0,spin)
+  htwoe += buffer_x(i)
+ enddo
+
+ ! particles :: direct terms
+ do i0 = 1, n_occ_ab_partcl(1)
+  i = occ_partcl(i0,1)
+  htwoe += buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_partcl(2)
+  i = occ_partcl(i0,2)
+  htwoe += buffer_c(i)
+ enddo
+
+ ! particles :: exchange terms
+ do i0 = 1, n_occ_ab_partcl(spin)
+  i = occ_partcl(i0,spin)
+  htwoe -= buffer_x(i)
+ enddo
+ hthree = 0.d0
+ htwoe = htwoe * phase
+ hmono = hmono * phase
+ htot  = htwoe + hmono + hthree
+
+end
+

From 4ee080215068c055603c24989f78ba6ceb25a43b Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Thu, 19 Jan 2023 19:29:26 +0100
Subject: [PATCH 42/68] two body part up to single excitations work with fock

---
 src/bi_ort_ints/total_twoe_pot.irp.f       |  23 +++
 src/determinants/single_excitations.irp.f  |   2 +-
 src/tc_bi_ortho/slater_tc.irp.f            |   3 +
 src/tc_bi_ortho/slater_tc_opt_single.irp.f | 214 ++++++++++++---------
 src/tc_bi_ortho/test_tc_bi_ortho.irp.f     |  20 +-
 5 files changed, 165 insertions(+), 97 deletions(-)

diff --git a/src/bi_ort_ints/total_twoe_pot.irp.f b/src/bi_ort_ints/total_twoe_pot.irp.f
index fef43f93..e74c6d2a 100644
--- a/src/bi_ort_ints/total_twoe_pot.irp.f
+++ b/src/bi_ort_ints/total_twoe_pot.irp.f
@@ -224,4 +224,27 @@ END_PROVIDER
     enddo
   enddo
 
+END_PROVIDER
+
+ BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals, (mo_num,mo_num, mo_num)]
+&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals,(mo_num,mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+ ! tc_2e_3idx_coulomb_integrals(j,k,i)  = <jk|ji> 
+ !
+ ! tc_2e_3idx_exchange_integrals(j,k,i) = <kj|ji> 
+ END_DOC
+ integer :: i,j,k,l
+ double precision :: get_two_e_integral
+ double precision :: integral
+
+ do i = 1, mo_num
+  do k = 1, mo_num
+   do j = 1, mo_num
+     tc_2e_3idx_coulomb_integrals(j, k,i) = mo_bi_ortho_tc_two_e(j ,k ,j ,i ) 
+     tc_2e_3idx_exchange_integrals(j,k,i) = mo_bi_ortho_tc_two_e(k ,j ,j ,i ) 
+   enddo
+  enddo
+ enddo
+
 END_PROVIDER
diff --git a/src/determinants/single_excitations.irp.f b/src/determinants/single_excitations.irp.f
index ccfeaa2e..1c25e314 100644
--- a/src/determinants/single_excitations.irp.f
+++ b/src/determinants/single_excitations.irp.f
@@ -28,7 +28,7 @@ BEGIN_PROVIDER [double precision, fock_operator_closed_shell_ref_bitmask, (mo_nu
  integer :: occ_virt(N_int*bit_kind_size,2)
  integer(bit_kind) :: key_test(N_int)
  integer(bit_kind) :: key_virt(N_int,2)
-
+ fock_operator_closed_shell_ref_bitmask = 0.d0
  call bitstring_to_list_ab(ref_closed_shell_bitmask, occ, n_occ_ab, N_int)
  do i = 1, N_int
   key_virt(i,1) = full_ijkl_bitmask(i)
diff --git a/src/tc_bi_ortho/slater_tc.irp.f b/src/tc_bi_ortho/slater_tc.irp.f
index 33b738ba..2c0ae2ca 100644
--- a/src/tc_bi_ortho/slater_tc.irp.f
+++ b/src/tc_bi_ortho/slater_tc.irp.f
@@ -324,6 +324,9 @@ subroutine single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
 
   call get_single_excitation(key_i, key_j, exc, phase, Nint)
   call decode_exc(exc,1,h1,p1,h2,p2,s1,s2)
+!  if(h1==14.and.p1==2)then
+!   print*,'h1,p1 old = ',h1,p1
+!  endif
 
   hmono = mo_bi_ortho_tc_one_e(p1,h1) * phase
 
diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
index a69f5d2e..df930136 100644
--- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
@@ -1,6 +1,6 @@
 
 
-subroutine  single_htilde_mu_mat_fock_bi_ortho (Nint, key_j, key_i, hmono, htwoe, htot)
+subroutine  single_htilde_mu_mat_fock_bi_ortho (Nint, key_j, key_i, hmono, htwoe, hthree, htot)
   BEGIN_DOC
   ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
   !!
@@ -14,7 +14,7 @@ subroutine  single_htilde_mu_mat_fock_bi_ortho (Nint, key_j, key_i, hmono, htwoe
   implicit none
   integer,           intent(in) :: Nint
   integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
-  double precision, intent(out) :: hmono, htwoe, htot
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
   integer                       :: occ(Nint*bit_kind_size,2)
   integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
   integer                       :: degree,exc(0:2,2,2)
@@ -27,109 +27,21 @@ subroutine  single_htilde_mu_mat_fock_bi_ortho (Nint, key_j, key_i, hmono, htwoe
   other_spin(1) = 2
   other_spin(2) = 1
 
-  hmono = 0.d0
-  htwoe= 0.d0
-  htot = 0.d0
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  hthree = 0.d0
+  htot   = 0.d0
   call get_excitation_degree(key_i, key_j, degree, Nint)
   if(degree.ne.1)then
    return
   endif
-   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
 
   call get_single_excitation(key_i, key_j, exc, phase, Nint)
   call decode_exc(exc,1,h1,p1,h2,p2,s1,s2)
   call get_single_excitation_from_fock_tc(key_i,key_j,h1,p1,s1,phase,hmono,htwoe,hthree,htot)
 end
 
- BEGIN_PROVIDER [double precision, tc_2e_3idx_coulomb_integrals, (mo_num,mo_num, mo_num)]
-&BEGIN_PROVIDER [double precision, tc_2e_3idx_exchange_integrals,(mo_num,mo_num, mo_num)]
- implicit none
- BEGIN_DOC
- ! tc_2e_3idx_coulomb_integrals(j,k,i)  = <jk|ji> 
- !
- ! tc_2e_3idx_exchange_integrals(j,k,i) = <kj|ji> 
- END_DOC
- integer :: i,j,k,l
- double precision :: get_two_e_integral
- double precision :: integral
-
- do k = 1, mo_num
-  do i = 1, mo_num
-   do j = 1, mo_num
-     tc_2e_3idx_coulomb_integrals(j, k,i) = mo_bi_ortho_tc_two_e(j ,k ,j ,i ) 
-     tc_2e_3idx_exchange_integrals(j,k,i) = mo_bi_ortho_tc_two_e(k ,j ,j ,i ) 
-   enddo
-  enddo
- enddo
-
-END_PROVIDER
-
-
-BEGIN_PROVIDER [double precision, fock_op_2_e_tc_closed_shell, (mo_num, mo_num) ]
- implicit none
- BEGIN_DOC
-! Closed-shell part of the Fock operator for the TC operator
- END_DOC
- integer :: h0,p0,h,p,k0,k,i
- integer :: n_occ_ab(2)
- integer :: occ(N_int*bit_kind_size,2)
- integer :: n_occ_ab_virt(2)
- integer :: occ_virt(N_int*bit_kind_size,2)
- integer(bit_kind) :: key_test(N_int)
- integer(bit_kind) :: key_virt(N_int,2)
-
- call bitstring_to_list_ab(ref_closed_shell_bitmask, occ, n_occ_ab, N_int)
- do i = 1, N_int
-  key_virt(i,1) = full_ijkl_bitmask(i)
-  key_virt(i,2) = full_ijkl_bitmask(i)
-  key_virt(i,1) = xor(key_virt(i,1),ref_closed_shell_bitmask(i,1))
-  key_virt(i,2) = xor(key_virt(i,2),ref_closed_shell_bitmask(i,2))
- enddo
- call bitstring_to_list_ab(key_virt, occ_virt, n_occ_ab_virt, N_int)
- ! docc ---> virt single excitations
- do h0 = 1,  n_occ_ab(1)
-  h=occ(h0,1)
-  do p0 = 1, n_occ_ab_virt(1)
-   p = occ_virt(p0,1)
-   accu = 0.d0
-   do k0 = 1, n_occ_ab(1)
-    k = occ(k0,1)
-    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - array_exchange(k,p,h)
-   enddo
-   fock_op_2_e_tc_closed_shell(p,h) = accu 
-  enddo
- enddo
-
- ! virt ---> virt single excitations
- do h0 = 1,  n_occ_ab_virt(1)
-  h=occ_virt(h0,1)
-  do p0 = 1, n_occ_ab_virt(1)
-   p = occ_virt(p0,1)
-   accu = 0.d0
-   do k0 = 1, n_occ_ab(1)
-    k = occ(k0,1)
-    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - array_exchange(k,p,h)
-   enddo
-   fock_op_2_e_tc_closed_shell(p,h) = accu 
-  enddo
- enddo
-
- ! docc ---> docc single excitations
- do h0 = 1,  n_occ_ab(1)
-  h=occ(h0,1)
-  do p0 = 1, n_occ_ab(1)
-   p = occ(p0,1)
-   accu = 0.d0
-   do k0 = 1, n_occ_ab(1)
-    k = occ(k0,1)
-    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - array_exchange(k,p,h)
-   enddo
-   fock_op_2_e_tc_closed_shell(p,h) = accu 
-  enddo
- enddo
-
-END_PROVIDER
-
 
 subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,htwoe,hthree,htot)
  use bitmasks
@@ -200,3 +112,115 @@ subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,h
 
 end
 
+
+BEGIN_PROVIDER [double precision, fock_op_2_e_tc_closed_shell, (mo_num, mo_num) ]
+ implicit none
+ BEGIN_DOC
+! Closed-shell part of the Fock operator for the TC operator
+ END_DOC
+ integer :: h0,p0,h,p,k0,k,i
+ integer :: n_occ_ab(2)
+ integer :: occ(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_virt(2)
+ integer :: occ_virt(N_int*bit_kind_size,2)
+ integer(bit_kind) :: key_test(N_int)
+ integer(bit_kind) :: key_virt(N_int,2)
+ double precision :: accu
+
+ fock_op_2_e_tc_closed_shell = -1000.d0
+ call bitstring_to_list_ab(ref_closed_shell_bitmask, occ, n_occ_ab, N_int)
+ do i = 1, N_int
+  key_virt(i,1) = full_ijkl_bitmask(i)
+  key_virt(i,2) = full_ijkl_bitmask(i)
+  key_virt(i,1) = xor(key_virt(i,1),ref_closed_shell_bitmask(i,1))
+  key_virt(i,2) = xor(key_virt(i,2),ref_closed_shell_bitmask(i,2))
+ enddo
+ call bitstring_to_list_ab(key_virt, occ_virt, n_occ_ab_virt, N_int)
+ ! docc ---> virt single excitations
+ do h0 = 1,  n_occ_ab(1)
+  h=occ(h0,1)
+  do p0 = 1, n_occ_ab_virt(1)
+   p = occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ do h0 = 1, n_occ_ab_virt(1)
+  h = occ_virt(h0,1)
+  do p0 = 1,  n_occ_ab(1)
+   p=occ(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ ! virt ---> virt single excitations
+ do h0 = 1,  n_occ_ab_virt(1)
+  h=occ_virt(h0,1)
+  do p0 = 1, n_occ_ab_virt(1)
+   p = occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ do h0 = 1, n_occ_ab_virt(1)
+  h = occ_virt(h0,1)
+  do p0 = 1,  n_occ_ab_virt(1)
+   p=occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+
+ ! docc ---> docc single excitations
+ do h0 = 1,  n_occ_ab(1)
+  h=occ(h0,1)
+  do p0 = 1, n_occ_ab(1)
+   p = occ(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ do h0 = 1, n_occ_ab(1)
+  h = occ(h0,1)
+  do p0 = 1,  n_occ_ab(1)
+   p=occ(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+! do i = 1, mo_num
+!  write(*,'(100(F10.5,X))')fock_op_2_e_tc_closed_shell(:,i)
+! enddo
+
+END_PROVIDER
+
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index 094c9bbc..069a1d53 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -19,10 +19,28 @@ subroutine test_slater_tc_opt
  integer :: i,j
  double precision :: hmono, htwoe, htot, hthree 
  double precision :: hnewmono, hnewtwoe, hnewthnewree, hnewtot
+ double precision :: accu ,i_count
+ accu = 0.d0
+ i_count = 0.d0
  do i = 1, N_det
+! do i = 14,14
   call diag_htilde_mu_mat_bi_ortho(N_int, psi_det(1,1,i), hmono, htwoe, htot)
   call diag_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), hnewmono, hnewtwoe, hnewthnewree, hnewtot)
-  print*,htot,hnewtot,dabs(htot-hnewtot) 
+  do j = 1, N_det
+!  do j = 1, 1
+   if(i==j)cycle
+   call single_htilde_mu_mat_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, htot)
+   call single_htilde_mu_mat_fock_bi_ortho (N_int, psi_det(1,1,j), psi_det(1,1,i), hnewmono, hnewtwoe, hnewthnewree, hnewtot)
+   if(dabs(htot).gt.1.d-10)then
+!    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
+     print*,j,i
+     i_count += 1.D0
+     print*,htot,hnewtot,dabs(htot-hnewtot) 
+     accu += dabs(htot-hnewtot) 
+!    endif
+   endif
+  enddo
  enddo
+ print*,'accu = ',accu/i_count
 
 end

From f0178d09a2572990111211fbe4b894bde9fa05ee Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Thu, 19 Jan 2023 22:34:11 +0100
Subject: [PATCH 43/68] diagonal matrix elements work with 3-e a la fock

---
 src/bi_ort_ints/three_body_ijm.irp.f   |  2 +-
 src/tc_bi_ortho/slater_tc_3e.irp.f     | 13 -----
 src/tc_bi_ortho/slater_tc_opt.irp.f    | 78 ++++++++++++++++++++++++--
 src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 18 ++++--
 4 files changed, 88 insertions(+), 23 deletions(-)

diff --git a/src/bi_ort_ints/three_body_ijm.irp.f b/src/bi_ort_ints/three_body_ijm.irp.f
index 0e42264b..4d21cb93 100644
--- a/src/bi_ort_ints/three_body_ijm.irp.f
+++ b/src/bi_ort_ints/three_body_ijm.irp.f
@@ -60,7 +60,7 @@ BEGIN_PROVIDER [ double precision, three_e_3_idx_cycle_1_bi_ort, (mo_num, mo_num
   !
   ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS for the first cyclic permutation 
   !
-  ! three_e_3_idx_direct_bi_ort(m,j,i) = <mji|-L|jim>
+  ! three_e_3_idx_cycle_1_bi_ort(m,j,i) = <mji|-L|jim>
   !
   ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
   !
diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f
index a56a432f..0d5f8542 100644
--- a/src/tc_bi_ortho/slater_tc_3e.irp.f
+++ b/src/tc_bi_ortho/slater_tc_3e.irp.f
@@ -49,8 +49,6 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
 
   if(Ne(1)+Ne(2).ge.3)then
 !!  ! alpha/alpha/beta three-body
-  double precision :: accu 
-  accu = 0.d0
    do i = 1, Ne(1)
     ii = occ(i,1) 
     do j = i+1, Ne(1)
@@ -62,14 +60,11 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
       direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR 
       exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR 
       hthree += direct_int - exchange_int
-      accu += direct_int - exchange_int
      enddo
     enddo
    enddo
-   !print*,'aab = ',accu
   
    ! beta/beta/alpha three-body
-  accu = 0.d0
    do i = 1, Ne(2)
     ii = occ(i,2) 
     do j = i+1, Ne(2)
@@ -79,14 +74,11 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
       direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) 
       exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii)
       hthree += direct_int - exchange_int
-      accu += direct_int - exchange_int
      enddo
     enddo
    enddo
-   !print*,'abb = ',accu
 
    ! alpha/alpha/alpha three-body
-   accu = 0.d0
    do i = 1, Ne(1)
     ii = occ(i,1) ! 1
     do j = i+1, Ne(1)
@@ -95,14 +87,11 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
       mm = occ(m,1) ! 3 
 !      ref =  sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR 
       hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS
-      accu += three_e_diag_parrallel_spin(mm,jj,ii) 
      enddo
     enddo
    enddo
-   !print*,'aaa = ',accu
 
    ! beta/beta/beta three-body
-   accu = 0.d0
    do i = 1, Ne(2)
     ii = occ(i,2) ! 1
     do j = i+1, Ne(2)
@@ -111,11 +100,9 @@ subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
       mm = occ(m,2) ! 3
 !      ref =  sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR 
       hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS
-      accu += three_e_diag_parrallel_spin(mm,jj,ii) 
      enddo
     enddo
    enddo
-   !print*,'bbb = ',accu
   endif
 
 end
diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f
index 50886fe3..4048f481 100644
--- a/src/tc_bi_ortho/slater_tc_opt.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@@ -55,6 +55,9 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree,
   enddo
 
   if (nexc(1)+nexc(2) == 0) then
+    hmono = ref_tc_energy_1e
+    htwoe = ref_tc_energy_2e
+    hthree= ref_tc_energy_3e
     htot = ref_tc_energy_tot
     return
   endif
@@ -75,7 +78,6 @@ subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree,
   hmono = ref_tc_energy_1e
   htwoe = ref_tc_energy_2e 
   hthree= ref_tc_energy_3e
-
   do ispin=1,2
     na = elec_num_tab(ispin)
     nb = elec_num_tab(iand(ispin,1)+1)
@@ -110,7 +112,9 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
 
   integer                        :: occ(Nint*bit_kind_size,2)
   integer                        :: other_spin
-  integer                        :: k,l,i
+  integer                        :: k,l,i,jj,mm,j,m
+  double precision :: three_e_diag_parrallel_spin, direct_int, exchange_int
+  
 
   if (iorb < 1) then
     print *,  irp_here, ': iorb < 1'
@@ -151,6 +155,39 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
   do i=1,nb
     htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
   enddo
+
+  if(three_body_h_tc)then
+   !!!!! 3-e part 
+   !! same-spin/same-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = j+1, na
+     mm = occ(m,ispin)
+     hthree += three_e_diag_parrallel_spin(mm,jj,iorb)
+    enddo
+   enddo
+   !! same-spin/oposite-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = 1, nb
+     mm = occ(m,other_spin)
+     direct_int   = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     hthree += direct_int - exchange_int
+    enddo
+   enddo
+   !! oposite-spin/opposite-spin
+    do j = 1, nb
+     jj = occ(j,other_spin) 
+     do m = j+1, nb 
+      mm = occ(m,other_spin) 
+      direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      exchange_int = three_e_3_idx_exch23_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      hthree += direct_int - exchange_int
+     enddo
+    enddo
+  endif
+
   na = na+1
 end
 
@@ -172,10 +209,11 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
   integer, intent(inout)         :: na, nb
   integer(bit_kind), intent(inout) :: key(Nint,2)
   double precision, intent(inout) :: hmono,htwoe,hthree
-
+  
+  double precision  :: direct_int, exchange_int, three_e_diag_parrallel_spin
   integer                        :: occ(Nint*bit_kind_size,2)
   integer                        :: other_spin
-  integer                        :: k,l,i
+  integer                        :: k,l,i,jj,mm,j,m
   integer                        :: tmp(2)
 
   ASSERT (iorb > 0)
@@ -205,5 +243,37 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
     htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
   enddo
 
+  if(three_body_h_tc)then
+   !!!!! 3-e part 
+   !! same-spin/same-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = j+1, na
+     mm = occ(m,ispin)
+     hthree -= three_e_diag_parrallel_spin(mm,jj,iorb)
+    enddo
+   enddo
+   !! same-spin/oposite-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = 1, nb
+     mm = occ(m,other_spin)
+     direct_int   = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     hthree -= (direct_int - exchange_int)
+    enddo
+   enddo
+   !! oposite-spin/opposite-spin
+    do j = 1, nb
+     jj = occ(j,other_spin) 
+     do m = j+1, nb 
+      mm = occ(m,other_spin) 
+      direct_int   = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      exchange_int = three_e_3_idx_exch23_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      hthree -= (direct_int - exchange_int)
+     enddo
+    enddo
+  endif
+
 end
 
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index 069a1d53..dda4bd00 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -19,28 +19,36 @@ subroutine test_slater_tc_opt
  integer :: i,j
  double precision :: hmono, htwoe, htot, hthree 
  double precision :: hnewmono, hnewtwoe, hnewthnewree, hnewtot
- double precision :: accu ,i_count
+ double precision :: accu_d ,i_count, accu
  accu = 0.d0
+ accu_d = 0.d0
  i_count = 0.d0
  do i = 1, N_det
 ! do i = 14,14
   call diag_htilde_mu_mat_bi_ortho(N_int, psi_det(1,1,i), hmono, htwoe, htot)
+  call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
   call diag_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), hnewmono, hnewtwoe, hnewthnewree, hnewtot)
+!  print*,hthree,hnewthnewree
+!  print*,htot,hnewtot,dabs(hnewtot-htot)
+  accu_d += dabs(htot-hnewtot) 
+!  if(dabs(htot-hnewtot).gt.1.d-8)then
+   print*,i
+   print*,htot,hnewtot,dabs(htot-hnewtot)
+!  endif
   do j = 1, N_det
-!  do j = 1, 1
    if(i==j)cycle
    call single_htilde_mu_mat_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, htot)
    call single_htilde_mu_mat_fock_bi_ortho (N_int, psi_det(1,1,j), psi_det(1,1,i), hnewmono, hnewtwoe, hnewthnewree, hnewtot)
    if(dabs(htot).gt.1.d-10)then
-!    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
+    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
      print*,j,i
      i_count += 1.D0
      print*,htot,hnewtot,dabs(htot-hnewtot) 
      accu += dabs(htot-hnewtot) 
-!    endif
+    endif
    endif
   enddo
  enddo
- print*,'accu = ',accu/i_count
+ print*,'accu_d = ',accu_d/N_det
 
 end

From 0011aa2bc2817ad5f519e35c61a2ec667e35223f Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 20 Jan 2023 11:19:38 +0100
Subject: [PATCH 44/68] TC single excitations H matrix elements work with Fock
 matrix

---
 src/bi_ort_ints/three_body_ijmk.irp.f      |   4 +-
 src/tc_bi_ortho/slater_tc_opt_single.irp.f | 234 +++++++++++++++++++++
 src/tc_bi_ortho/test_tc_bi_ortho.irp.f     |  39 ++--
 3 files changed, 262 insertions(+), 15 deletions(-)

diff --git a/src/bi_ort_ints/three_body_ijmk.irp.f b/src/bi_ort_ints/three_body_ijmk.irp.f
index 0d5016ce..853972f7 100644
--- a/src/bi_ort_ints/three_body_ijmk.irp.f
+++ b/src/bi_ort_ints/three_body_ijmk.irp.f
@@ -195,7 +195,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch13_bi_ort, (mo_num, mo_num,
   !
   ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
   !
-  ! three_e_4_idx_exch13_bi_ort(m,j,k,i) = <mjk|-L|jmi> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  ! three_e_4_idx_exch13_bi_ort(m,j,k,i) = <mjk|-L|ijm> ::: notice that i is the RIGHT MO and k is the LEFT MO
   !
   ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
   END_DOC
@@ -241,7 +241,7 @@ BEGIN_PROVIDER [ double precision, three_e_4_idx_exch12_bi_ort, (mo_num, mo_num,
   ! 
   ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
   !
-  ! three_e_4_idx_exch12_bi_ort(m,j,k,i) = <mjk|-L|jmi> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  ! three_e_4_idx_exch12_bi_ort(m,j,k,i) = <mjk|-L|mij> ::: notice that i is the RIGHT MO and k is the LEFT MO
   !
   ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
   ! 
diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
index df930136..cb9306aa 100644
--- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
@@ -106,12 +106,246 @@ subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,h
   htwoe -= buffer_x(i)
  enddo
  hthree = 0.d0
+ if (three_body_h_tc)then
+  call three_comp_fock_elem(key_i,h,p,spin,hthree)
+ endif
+
+
  htwoe = htwoe * phase
  hmono = hmono * phase
+ hthree = hthree * phase
  htot  = htwoe + hmono + hthree
 
 end
 
+subroutine three_comp_fock_elem(key_i,h_fock,p_fock,ispin_fock,hthree)
+ implicit none
+ integer,intent(in) :: h_fock,p_fock,ispin_fock
+ integer(bit_kind), intent(in) :: key_i(N_int,2)
+ double precision, intent(out) :: hthree
+ integer :: nexc(2),i,ispin,na,nb
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: particle(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_particle(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_particle(2)
+ integer(bit_kind)              :: det_tmp(N_int,2)
+
+
+  nexc(1) = 0
+  nexc(2) = 0
+  !! Get all the holes and particles of key_i with respect to the ROHF determinant
+  do i=1,N_int
+    hole(i,1)     = xor(key_i(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(key_i(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),key_i(i,1))
+    particle(i,2) = iand(hole(i,2),key_i(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+
+  !! Initialize the matrix element with the reference ROHF Slater determinant Fock element
+  if(ispin_fock==1)then
+   hthree = fock_a_tot_3e_bi_orth(p_fock,h_fock) 
+  else 
+   hthree = fock_b_tot_3e_bi_orth(p_fock,h_fock) 
+  endif
+  det_tmp = ref_bitmask
+  do ispin=1,2
+    na = elec_num_tab(ispin)
+    nb = elec_num_tab(iand(ispin,1)+1)
+    do i=1,nexc(ispin)
+      !DIR$ FORCEINLINE
+      call fock_ac_tc_operator( occ_particle(i,ispin), ispin, det_tmp, h_fock,p_fock, ispin_fock, hthree, N_int,na,nb)
+      !DIR$ FORCEINLINE
+      call fock_a_tc_operator ( occ_hole    (i,ispin), ispin, det_tmp, h_fock,p_fock, ispin_fock, hthree, N_int,na,nb)
+    enddo
+  enddo
+end
+
+subroutine fock_ac_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes the contribution to the three-electron part of the Fock operator 
+  !
+  ! a^dagger_{p_fock} a_{h_fock} of spin ispin_fock
+  ! 
+  ! on top of a determinant 'key' on which you ADD an electron of spin ispin in orbital iorb
+  ! 
+  ! in output, the determinant key is changed by the ADDITION of that electron 
+  !
+  ! the output hthree is INCREMENTED
+  END_DOC
+  integer, intent(in)              :: iorb, ispin, Nint, h_fock,p_fock, ispin_fock
+  integer, intent(inout)           :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout)  :: hthree
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,j
+  double precision :: three_e_single_parrallel_spin, direct_int, exchange_int
+  
+
+  if (iorb < 1) then
+    print *,  irp_here, ': iorb < 1'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+  if (iorb > mo_num) then
+    print *,  irp_here, ': iorb > mo_num'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  ASSERT (tmp(1) == elec_alpha_num)
+  ASSERT (tmp(2) == elec_beta_num)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key(k,ispin) = ibset(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+
+  !! spin of other electrons == ispin 
+  if(ispin == ispin_fock)then
+   !! in what follows :: jj == other electrons in the determinant 
+   !!                 :: iorb == electron that has been added of spin ispin
+   !!                 :: p_fock, h_fock == hole particle of spin ispin_fock
+   !! jj = ispin = ispin_fock >> pure parallel spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    hthree += three_e_single_parrallel_spin(jj,iorb,p_fock,h_fock)
+   enddo
+   !! spin of jj == other spin than ispin AND ispin_fock
+   !! exchange between the iorb and (h_fock, p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree += direct_int - exchange_int
+   enddo
+  else !! ispin NE to ispin_fock
+   !! jj = ispin BUT NON EQUAL TO ispin_fock 
+   !! exchange between the jj and iorb
+   do j = 1, na
+    jj = occ(j,ispin)
+    direct_int   = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch23_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree += direct_int - exchange_int
+   enddo
+   !! jj = other_spin than ispin BUT jj == ispin_fock
+   !! exchange between jj and (h_fock,p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch13_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree += direct_int - exchange_int
+   enddo
+  endif
+
+  na = na+1
+end
+
+subroutine fock_a_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes the contribution to the three-electron part of the Fock operator 
+  !
+  ! a^dagger_{p_fock} a_{h_fock} of spin ispin_fock
+  ! 
+  ! on top of a determinant 'key' on which you REMOVE an electron of spin ispin in orbital iorb
+  ! 
+  ! in output, the determinant key is changed by the REMOVAL of that electron 
+  !
+  ! the output hthree is INCREMENTED
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint, h_fock,p_fock, ispin_fock
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hthree
+  
+  double precision  :: direct_int, exchange_int, three_e_single_parrallel_spin
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  integer                        :: tmp(2)
+
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k>0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  key(k,ispin) = ibclr(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  na = na-1
+  !! spin of other electrons == ispin 
+  if(ispin == ispin_fock)then
+   !! in what follows :: jj == other electrons in the determinant 
+   !!                 :: iorb == electron that has been added of spin ispin
+   !!                 :: p_fock, h_fock == hole particle of spin ispin_fock
+   !! jj = ispin = ispin_fock >> pure parallel spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    hthree -= three_e_single_parrallel_spin(jj,iorb,p_fock,h_fock)
+   enddo
+   !! spin of jj == other spin than ispin AND ispin_fock
+   !! exchange between the iorb and (h_fock, p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree -= direct_int - exchange_int
+   enddo
+  else !! ispin NE to ispin_fock
+   !! jj = ispin BUT NON EQUAL TO ispin_fock 
+   !! exchange between the jj and iorb
+   do j = 1, na
+    jj = occ(j,ispin)
+    direct_int   = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch23_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree -= direct_int - exchange_int
+   enddo
+   !! jj = other_spin than ispin BUT jj == ispin_fock
+   !! exchange between jj and (h_fock,p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch13_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree -= direct_int - exchange_int
+   enddo
+  endif
+
+end
+
 
 BEGIN_PROVIDER [double precision, fock_op_2_e_tc_closed_shell, (mo_num, mo_num) ]
  implicit none
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index dda4bd00..f48984ea 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -18,37 +18,50 @@ subroutine test_slater_tc_opt
  implicit none
  integer :: i,j
  double precision :: hmono, htwoe, htot, hthree 
- double precision :: hnewmono, hnewtwoe, hnewthnewree, hnewtot
+ double precision :: hnewmono, hnewtwoe, hnewthree, hnewtot
  double precision :: accu_d ,i_count, accu
  accu = 0.d0
  accu_d = 0.d0
  i_count = 0.d0
  do i = 1, N_det
-! do i = 14,14
+! do i = 1,1
   call diag_htilde_mu_mat_bi_ortho(N_int, psi_det(1,1,i), hmono, htwoe, htot)
   call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
-  call diag_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), hnewmono, hnewtwoe, hnewthnewree, hnewtot)
-!  print*,hthree,hnewthnewree
+  call diag_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), hnewmono, hnewtwoe, hnewthree, hnewtot)
+!  print*,hthree,hnewthree
 !  print*,htot,hnewtot,dabs(hnewtot-htot)
   accu_d += dabs(htot-hnewtot) 
-!  if(dabs(htot-hnewtot).gt.1.d-8)then
+  if(dabs(htot-hnewtot).gt.1.d-8)then
    print*,i
    print*,htot,hnewtot,dabs(htot-hnewtot)
-!  endif
-  do j = 1, N_det
+  endif
+!  do j = 319,319
+  do j = 1,N_det
    if(i==j)cycle
-   call single_htilde_mu_mat_bi_ortho(N_int, psi_det(1,1,j), psi_det(1,1,i), hmono, htwoe, htot)
-   call single_htilde_mu_mat_fock_bi_ortho (N_int, psi_det(1,1,j), psi_det(1,1,i), hnewmono, hnewtwoe, hnewthnewree, hnewtot)
-   if(dabs(htot).gt.1.d-10)then
-    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
-     print*,j,i
+   integer :: degree 
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree .ne. 1)cycle
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call single_htilde_mu_mat_fock_bi_ortho (N_int, psi_det(1,1,j), psi_det(1,1,i), hnewmono, hnewtwoe, hnewthree, hnewtot)
+!   print*,'j,i',j,i
+!   print*,htot,hnewtot,dabs(htot-hnewtot) 
+!   print*,hthree,hnewthree,dabs(hthree-hnewthree) 
+   if(dabs(hthree).gt.1.d-15)then
+!    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
      i_count += 1.D0
-     print*,htot,hnewtot,dabs(htot-hnewtot) 
      accu += dabs(htot-hnewtot) 
+    if(dabs(hthree-hnewthree).gt.1.d-8.or.dabs(hthree-hnewthree).gt.dabs(hthree))then
+     print*,j,i
+     call debug_det(psi_det(1,1,i),N_int)
+     call debug_det(psi_det(1,1,j),N_int)
+!     print*,htot,hnewtot,dabs(htot-hnewtot) 
+     print*,hthree,hnewthree,dabs(hthree-hnewthree) 
+     stop
     endif
    endif
   enddo
  enddo
  print*,'accu_d = ',accu_d/N_det
+ print*,'accu   = ',accu/i_count
 
 end

From 7a144bc1a2ad7cc26dc90310cfd996ea3d464288 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 20 Jan 2023 11:20:26 +0100
Subject: [PATCH 45/68] renamed a file

---
 src/tc_bi_ortho/{slater_tc_opt.irp.f => slater_tc_opt_diag.irp.f} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/tc_bi_ortho/{slater_tc_opt.irp.f => slater_tc_opt_diag.irp.f} (100%)

diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
similarity index 100%
rename from src/tc_bi_ortho/slater_tc_opt.irp.f
rename to src/tc_bi_ortho/slater_tc_opt_diag.irp.f

From 721e0963b9e6cfe6e58e4e7314bd0d8d1230d672 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 20 Jan 2023 11:31:28 +0100
Subject: [PATCH 46/68] working on TC Slater matrix elements

---
 src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index f48984ea..8afb3b25 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -40,22 +40,21 @@ subroutine test_slater_tc_opt
    if(i==j)cycle
    integer :: degree 
    call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
-   if(degree .ne. 1)cycle
+!   if(degree .ne. 1)cycle
    call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
-   call single_htilde_mu_mat_fock_bi_ortho (N_int, psi_det(1,1,j), psi_det(1,1,i), hnewmono, hnewtwoe, hnewthree, hnewtot)
-!   print*,'j,i',j,i
-!   print*,htot,hnewtot,dabs(htot-hnewtot) 
-!   print*,hthree,hnewthree,dabs(hthree-hnewthree) 
-   if(dabs(hthree).gt.1.d-15)then
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot)
+!   if(dabs(hthree).gt.1.d-15)then
+   if(dabs(htot).gt.1.d-15)then
 !    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
      i_count += 1.D0
      accu += dabs(htot-hnewtot) 
-    if(dabs(hthree-hnewthree).gt.1.d-8.or.dabs(hthree-hnewthree).gt.dabs(hthree))then
+!    if(dabs(hthree-hnewthree).gt.1.d-8.or.dabs(hthree-hnewthree).gt.dabs(hthree))then
+    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
      print*,j,i
      call debug_det(psi_det(1,1,i),N_int)
      call debug_det(psi_det(1,1,j),N_int)
-!     print*,htot,hnewtot,dabs(htot-hnewtot) 
-     print*,hthree,hnewthree,dabs(hthree-hnewthree) 
+     print*,htot,hnewtot,dabs(htot-hnewtot) 
+!     print*,hthree,hnewthree,dabs(hthree-hnewthree) 
      stop
     endif
    endif

From f1137bc883fa401d3941dcde119638c78d8bd76a Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 20 Jan 2023 14:57:21 +0100
Subject: [PATCH 47/68] beginning to optimize the double excitations for TC

---
 src/bi_ort_ints/three_body_ijmkl.irp.f     |  6 +--
 src/tc_bi_ortho/slater_tc_opt_double.irp.f | 57 ++++++++++++++++++++++
 src/tc_bi_ortho/test_normal_order.irp.f    | 52 ++++++++++----------
 3 files changed, 87 insertions(+), 28 deletions(-)
 create mode 100644 src/tc_bi_ortho/slater_tc_opt_double.irp.f

diff --git a/src/bi_ort_ints/three_body_ijmkl.irp.f b/src/bi_ort_ints/three_body_ijmkl.irp.f
index 6287c5a3..bd5c4977 100644
--- a/src/bi_ort_ints/three_body_ijmkl.irp.f
+++ b/src/bi_ort_ints/three_body_ijmkl.irp.f
@@ -7,7 +7,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_direct_bi_ort, (mo_num, mo_num,
   !
   ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
   !
-  ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = <mjk|-L|mji> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  ! three_e_5_idx_direct_bi_ort(m,l,j,k,i) = <mlk|-L|mji> ::: notice that i is the RIGHT MO and k is the LEFT MO
   !
   ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
   END_DOC
@@ -202,7 +202,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch13_bi_ort, (mo_num, mo_num,
   !
   ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
   !
-  ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = <mlk|-L|jmi> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  ! three_e_5_idx_exch13_bi_ort(m,l,j,k,i) = <mlk|-L|ijm> ::: notice that i is the RIGHT MO and k is the LEFT MO
   !
   ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
   !
@@ -251,7 +251,7 @@ BEGIN_PROVIDER [ double precision, three_e_5_idx_exch12_bi_ort, (mo_num, mo_num,
   !
   ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
   !
-  ! three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = <mlk|-L|jmi> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  ! three_e_5_idx_exch12_bi_ort(m,l,j,k,i) = <mlk|-L|mij> ::: notice that i is the RIGHT MO and k is the LEFT MO
   !
   ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
   !
diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
new file mode 100644
index 00000000..9cff8ff3
--- /dev/null
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@@ -0,0 +1,57 @@
+BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/beta double excitations 
+!
+! from contraction with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_beta a_h2_beta a_h1_alpha
+ END_DOC
+ integer :: i,h1,p1,h2,p2
+ integer :: hh1,hh2,pp1,pp2,m,mm
+ integer                        :: Ne(2)
+ integer,           allocatable :: occ(:,:)
+ double precision :: contrib
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ eff_2_e_from_3_e_ab = 0.d0
+  do hh1 = 1, n_act_orb !! alpha 
+    h1 = list_act(hh1) 
+    do hh2 = 1, n_act_orb !! beta 
+      h2 = list_act(hh2) 
+      do pp1 = 1, n_act_orb !! alpha
+        p1 = list_act(pp1)
+        do pp2 = 1, n_act_orb !! beta 
+          p2 = list_act(pp2)
+          call give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib)
+          eff_2_e_from_3_e_ab(p2,p1,h2,h1) = contrib
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib)
+ implicit none
+ integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2)
+ double precision, intent(out) :: contrib
+ integer :: mm,m 
+ double precision :: direct_int, exchange_int
+ !! h1,p1 == alpha 
+ !! h2,p2 == beta
+ contrib = 0.d0
+ do mm = 1, Ne(1) !! alpha 
+   m = occ(m,1)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h1,p1) and m
+   exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+
+ do mm = 1, Ne(2) !! beta
+   m = occ(m,2)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h2,p2) and m
+   exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+end
diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f
index 8bdc57ee..f3641049 100644
--- a/src/tc_bi_ortho/test_normal_order.irp.f
+++ b/src/tc_bi_ortho/test_normal_order.irp.f
@@ -10,6 +10,7 @@ program test_normal_order
   read_wf = .True.
   touch read_wf
   touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+  call provide_all_three_ints_bi_ortho
   call test
 end
 
@@ -28,7 +29,7 @@ subroutine test
  s2 = 2
  accu = 0.d0
  do h1 = 1, elec_beta_num
-  do p1 = elec_beta_num+1, mo_num
+  do p1 = elec_alpha_num+1, mo_num
    do h2 = 1, elec_beta_num
     do p2 = elec_beta_num+1, mo_num
      det_i = ref_bitmask
@@ -38,36 +39,37 @@ subroutine test
      call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
      call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
      hthree *= phase
-     normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1)
+!     normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1)
+     normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1)
      accu += dabs(hthree-normal)
     enddo
    enddo
   enddo
  enddo
- print*,'accu opposite spin = ',accu
+print*,'accu opposite spin = ',accu
 
- s1 = 2 
- s2 = 2
- accu = 0.d0
- do h1 = 1, elec_beta_num
-  do p1 = elec_beta_num+1, mo_num
-   do h2 = h1+1, elec_beta_num
-    do p2 = elec_beta_num+1, mo_num
-     det_i = ref_bitmask
-     call do_single_excitation(det_i,h1,p1,s1,i_ok)
-     call do_single_excitation(det_i,h2,p2,s2,i_ok)
-     if(i_ok.ne.1)cycle
-     call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
-     call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
-     call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
-     hthree *= phase
-     normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1)
-     accu += dabs(hthree-normal)
-    enddo
-   enddo
-  enddo
- enddo
- print*,'accu same spin = ',accu
+!s1 = 2 
+!s2 = 2
+!accu = 0.d0
+!do h1 = 1, elec_beta_num
+! do p1 = elec_beta_num+1, mo_num
+!  do h2 = h1+1, elec_beta_num
+!   do p2 = elec_beta_num+1, mo_num
+!    det_i = ref_bitmask
+!    call do_single_excitation(det_i,h1,p1,s1,i_ok)
+!    call do_single_excitation(det_i,h2,p2,s2,i_ok)
+!    if(i_ok.ne.1)cycle
+!    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+!    call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
+!    call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
+!    hthree *= phase
+!    normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1)
+!    accu += dabs(hthree-normal)
+!   enddo
+!  enddo
+! enddo
+!enddo
+!print*,'accu same spin = ',accu
 end
 
 

From ac2ebda9ce4f3efb672dbcd9b989f82f9719dc9a Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 20 Jan 2023 15:49:39 +0100
Subject: [PATCH 48/68] beginning to work on double exc with optimization

---
 src/tc_bi_ortho/slater_tc_3e.irp.f         |  24 ++--
 src/tc_bi_ortho/slater_tc_opt.irp.f        |  50 ++++++++
 src/tc_bi_ortho/slater_tc_opt_double.irp.f | 141 ++++++++++++++++++++-
 src/tc_bi_ortho/test_normal_order.irp.f    |  98 ++++++++++----
 4 files changed, 275 insertions(+), 38 deletions(-)
 create mode 100644 src/tc_bi_ortho/slater_tc_opt.irp.f

diff --git a/src/tc_bi_ortho/slater_tc_3e.irp.f b/src/tc_bi_ortho/slater_tc_3e.irp.f
index 0d5f8542..9740ee2f 100644
--- a/src/tc_bi_ortho/slater_tc_3e.irp.f
+++ b/src/tc_bi_ortho/slater_tc_3e.irp.f
@@ -256,20 +256,16 @@ subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
     if(Ne(1)+Ne(2).ge.3)then
      if(s1==s2)then ! same spin excitation 
       ispin = other_spin(s1)
-!      print*,'htilde ij'
-     do m = 1, Ne(ispin) ! direct(other_spin) - exchange(s1)
-      mm = occ(m,ispin)
-!!    direct_int = three_body_ints_bi_ort(mm,p2,p1,mm,h2,h1)
-!!    exchange_int = three_body_ints_bi_ort(mm,p2,p1,mm,h1,h2)
-      direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
-      exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1)
-!      print*,direct_int,exchange_int
-      hthree += direct_int - exchange_int
-     enddo
-     do m = 1, Ne(s1) ! pure contribution from s1 
-      mm = occ(m,s1)
-      hthree += three_e_double_parrallel_spin(mm,p2,h2,p1,h1)
-     enddo 
+      do m = 1, Ne(ispin) ! direct(other_spin) - exchange(s1)
+       mm = occ(m,ispin)
+       direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+       exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1)
+       hthree += direct_int - exchange_int
+      enddo
+      do m = 1, Ne(s1) ! pure contribution from s1 
+       mm = occ(m,s1)
+       hthree += three_e_double_parrallel_spin(mm,p2,h2,p1,h1)
+      enddo 
      else ! different spin excitation 
        do m = 1, Ne(s1)
         mm = occ(m,s1) ! 
diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f
new file mode 100644
index 00000000..c334b274
--- /dev/null
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@@ -0,0 +1,50 @@
+subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+
+  BEGIN_DOC
+  !
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  ! Returns the detail of the matrix element in terms of single, two and three electron contribution. 
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
+  integer                       :: degree 
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+  hthree = 0.D0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.gt.2) return
+
+  if(degree == 0)then
+    call diag_htilde_mu_mat_fock_bi_ortho(Nint, key_i, hmono, htwoe, hthree, htot)
+  else if (degree == 1)then
+    call single_htilde_mu_mat_fock_bi_ortho (Nint,key_j, key_i , hmono, htwoe, hthree, htot)
+  else if(degree == 2)then
+    call double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+    if(three_body_h_tc) then
+     if(.not.double_normal_ord) then
+       call double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+     endif
+    endif
+  endif
+
+  htot = hmono + htwoe + hthree
+  if(degree==0) then
+    htot += nuclear_repulsion
+  endif
+ 
+end
+
+! ---
diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
index 9cff8ff3..ef319c47 100644
--- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@@ -32,6 +32,11 @@ END_PROVIDER
 
 subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib)
  implicit none
+ BEGIN_DOC 
+! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_beta
+!
+! on top of a determinant whose occupied orbitals is in (occ, Ne)
+ END_DOC
  integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2)
  double precision, intent(out) :: contrib
  integer :: mm,m 
@@ -40,7 +45,7 @@ subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib)
  !! h2,p2 == beta
  contrib = 0.d0
  do mm = 1, Ne(1) !! alpha 
-   m = occ(m,1)
+   m = occ(mm,1)
    direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
    ! exchange between (h1,p1) and m
    exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1)
@@ -48,10 +53,142 @@ subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib)
  enddo
 
  do mm = 1, Ne(2) !! beta
-   m = occ(m,2)
+   m = occ(mm,2)
    direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
    ! exchange between (h2,p2) and m
    exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1)
    contrib += direct_int - exchange_int
  enddo
 end
+
+BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/alpha double excitations 
+!
+! from contractionelec_alpha_num with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_alpha a_h2_alpha a_h1_alpha
+!
+! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill 
+!
+! ||||    h2>h1, p2>p1   ||||
+ END_DOC
+ integer :: i,h1,p1,h2,p2
+ integer :: hh1,hh2,pp1,pp2,m,mm
+ integer                        :: Ne(2)
+ integer,           allocatable :: occ(:,:)
+ double precision :: contrib
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ eff_2_e_from_3_e_aa = 100000000.d0
+  do hh1 = 1, n_act_orb !! alpha 
+    h1 = list_act(hh1) 
+    do hh2 = hh1+1, n_act_orb !! alpha
+      h2 = list_act(hh2) 
+      do pp1 = 1, n_act_orb !! alpha
+        p1 = list_act(pp1)
+        do pp2 = pp1+1, n_act_orb !! alpha
+          p2 = list_act(pp2)
+          call give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib)
+          eff_2_e_from_3_e_aa(p2,p1,h2,h1) = contrib
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+subroutine give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib)
+ implicit none
+ BEGIN_DOC 
+! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_alpha
+!
+! on top of a determinant whose occupied orbitals is in (occ, Ne)
+ END_DOC
+ integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2)
+ double precision, intent(out) :: contrib
+ integer :: mm,m 
+ double precision :: direct_int, exchange_int
+ double precision :: three_e_double_parrallel_spin
+ !! h1,p1 == alpha 
+ !! h2,p2 == alpha
+ contrib = 0.d0
+ do mm = 1, Ne(1) !! alpha ==> pure parallele spin contribution
+   m = occ(mm,1)
+   contrib += three_e_double_parrallel_spin(m,p2,h2,p1,h1)
+ enddo
+
+ do mm = 1, Ne(2) !! beta
+   m = occ(mm,2)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h1,p1) and (h2,p2)
+   exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+end
+
+
+BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for beta/beta double excitations 
+!
+! from contractionelec_beta_num with HF density = a^{dagger}_p1_beta a^{dagger}_p2_beta a_h2_beta a_h1_beta
+!
+! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill 
+!
+! ||||    h2>h1, p2>p1   ||||
+ END_DOC
+ integer :: i,h1,p1,h2,p2
+ integer :: hh1,hh2,pp1,pp2,m,mm
+ integer                        :: Ne(2)
+ integer,           allocatable :: occ(:,:)
+ double precision :: contrib
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ eff_2_e_from_3_e_bb = 100000000.d0
+  do hh1 = 1, n_act_orb !! beta 
+    h1 = list_act(hh1) 
+    do hh2 = hh1+1, n_act_orb !! beta
+      h2 = list_act(hh2) 
+      do pp1 = 1, n_act_orb !! beta
+        p1 = list_act(pp1)
+        do pp2 = pp1+1, n_act_orb !! beta
+          p2 = list_act(pp2)
+          call give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib)
+          eff_2_e_from_3_e_bb(p2,p1,h2,h1) = contrib
+        enddo
+      enddo
+    enddo
+  enddo
+
+END_PROVIDER 
+
+subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib)
+ implicit none
+ BEGIN_DOC 
+! gives the contribution for a double excitation (h1,p1)_beta (h2,p2)_beta
+!
+! on top of a determinant whose occupied orbitals is in (occ, Ne)
+ END_DOC
+ integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2)
+ double precision, intent(out) :: contrib
+ integer :: mm,m 
+ double precision :: direct_int, exchange_int
+ double precision :: three_e_double_parrallel_spin
+ !! h1,p1 == beta
+ !! h2,p2 == beta
+ contrib = 0.d0
+ do mm = 1, Ne(2) !! beta ==> pure parallele spin contribution
+   m = occ(mm,1)
+   contrib += three_e_double_parrallel_spin(m,p2,h2,p1,h1)
+ enddo
+
+ do mm = 1, Ne(1) !! alpha
+   m = occ(mm,1)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h1,p1) and (h2,p2)
+   exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+end
+
diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f
index f3641049..46705f5f 100644
--- a/src/tc_bi_ortho/test_normal_order.irp.f
+++ b/src/tc_bi_ortho/test_normal_order.irp.f
@@ -39,7 +39,7 @@ subroutine test
      call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
      call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
      hthree *= phase
-!     normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1)
+!    !normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1)
      normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1)
      accu += dabs(hthree-normal)
     enddo
@@ -48,28 +48,82 @@ subroutine test
  enddo
 print*,'accu opposite spin = ',accu
 
-!s1 = 2 
-!s2 = 2
-!accu = 0.d0
-!do h1 = 1, elec_beta_num
-! do p1 = elec_beta_num+1, mo_num
-!  do h2 = h1+1, elec_beta_num
-!   do p2 = elec_beta_num+1, mo_num
-!    det_i = ref_bitmask
-!    call do_single_excitation(det_i,h1,p1,s1,i_ok)
-!    call do_single_excitation(det_i,h2,p2,s2,i_ok)
-!    if(i_ok.ne.1)cycle
-!    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
-!    call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
-!    call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
-!    hthree *= phase
+!    p2=6
+!    p1=5
+!    h2=2
+!    h1=1
+
+s1 = 1 
+s2 = 1
+accu = 0.d0
+do h1 = 1, elec_alpha_num
+ do p1 = elec_alpha_num+1, mo_num
+  do p2 = p1+1, mo_num
+   do h2 = h1+1, elec_alpha_num
+    det_i = ref_bitmask
+    call do_single_excitation(det_i,h1,p1,s1,i_ok)
+    if(i_ok.ne.1)cycle
+    call do_single_excitation(det_i,h2,p2,s2,i_ok)
+    if(i_ok.ne.1)cycle
+    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
+    call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
+    integer :: hh1, pp1, hh2, pp2, ss1, ss2
+    call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2)
+    hthree *= phase
 !    normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1)
-!    accu += dabs(hthree-normal)
-!   enddo
-!  enddo
-! enddo
-!enddo
-!print*,'accu same spin = ',accu
+     normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1)
+    if(dabs(hthree).lt.1.d-10)cycle
+    if(dabs(hthree-normal).gt.1.d-10)then
+     print*,pp2,pp1,hh2,hh1
+     print*,p2,p1,h2,h1
+     print*,hthree,normal,dabs(hthree-normal)
+     stop
+    endif
+!     print*,hthree,normal,dabs(hthree-normal)
+    accu += dabs(hthree-normal)
+   enddo
+  enddo
+ enddo
+enddo
+print*,'accu same spin alpha = ',accu
+
+
+s1 = 2 
+s2 = 2
+accu = 0.d0
+do h1 = 1, elec_beta_num
+ do p1 = elec_beta_num+1, mo_num
+  do p2 = p1+1, mo_num
+   do h2 = h1+1, elec_beta_num
+    det_i = ref_bitmask
+    call do_single_excitation(det_i,h1,p1,s1,i_ok)
+    if(i_ok.ne.1)cycle
+    call do_single_excitation(det_i,h2,p2,s2,i_ok)
+    if(i_ok.ne.1)cycle
+    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
+    call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
+    call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2)
+    hthree *= phase
+!    normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1)
+     normal = eff_2_e_from_3_e_bb(p2,p1,h2,h1)
+    if(dabs(hthree).lt.1.d-10)cycle
+    if(dabs(hthree-normal).gt.1.d-10)then
+     print*,pp2,pp1,hh2,hh1
+     print*,p2,p1,h2,h1
+     print*,hthree,normal,dabs(hthree-normal)
+     stop
+    endif
+!     print*,hthree,normal,dabs(hthree-normal)
+    accu += dabs(hthree-normal)
+   enddo
+  enddo
+ enddo
+enddo
+print*,'accu same spin beta  = ',accu
+
+
 end
 
 

From 9eba8d692d549e24d30e3236d432e800b7551249 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 20 Jan 2023 16:33:37 +0100
Subject: [PATCH 49/68] optimized all matrix elements with three body terms

---
 src/tc_bi_ortho/slater_tc_opt.irp.f        |  12 +-
 src/tc_bi_ortho/slater_tc_opt_double.irp.f | 197 +++++++++++++++++++++
 src/tc_bi_ortho/test_normal_order.irp.f    |   4 +-
 src/tc_bi_ortho/test_tc_bi_ortho.irp.f     |   3 +-
 4 files changed, 205 insertions(+), 11 deletions(-)

diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f
index c334b274..8ab3388c 100644
--- a/src/tc_bi_ortho/slater_tc_opt.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@@ -28,19 +28,13 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree,
   if(degree.gt.2) return
 
   if(degree == 0)then
-    call diag_htilde_mu_mat_fock_bi_ortho(Nint, key_i, hmono, htwoe, hthree, htot)
+    call diag_htilde_mu_mat_fock_bi_ortho  (Nint, key_i, hmono, htwoe, hthree, htot)
   else if (degree == 1)then
-    call single_htilde_mu_mat_fock_bi_ortho (Nint,key_j, key_i , hmono, htwoe, hthree, htot)
+    call single_htilde_mu_mat_fock_bi_ortho(Nint,key_j, key_i , hmono, htwoe, hthree, htot)
   else if(degree == 2)then
-    call double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
-    if(three_body_h_tc) then
-     if(.not.double_normal_ord) then
-       call double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
-     endif
-    endif
+    call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot)
   endif
 
-  htot = hmono + htwoe + hthree
   if(degree==0) then
     htot += nuclear_repulsion
   endif
diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
index ef319c47..ca1d0eea 100644
--- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@@ -1,3 +1,200 @@
+
+subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for double excitation  ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint 
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int,phase
+
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  hthree = 0.d0
+  htot   = 0.d0
+
+  if(degree.ne.2)then
+   return
+  endif
+
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+  if(s1.ne.s2)then
+   ! opposite spin two-body 
+!   key_j, key_i
+    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
+    if(three_body_h_tc)then
+     if(.not.double_normal_ord)then
+      call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+     elseif(double_normal_ord.and.+Ne(1).gt.2)then
+      htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ???
+     endif
+    endif
+  else
+   ! same spin two-body 
+   ! direct terms 
+   htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
+   ! exchange terms 
+   htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
+   if(three_body_h_tc)then
+    if(.not.double_normal_ord)then
+     call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+    elseif(double_normal_ord.and.+Ne(1).gt.2)then
+      htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ???
+      htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ???
+    endif
+   endif
+  endif
+  hthree *= phase
+  htwoe  *= phase
+  htot    =  htwoe + hthree
+
+end
+
+
+
+subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+ implicit none
+ integer(bit_kind), intent(in) :: key_i(N_int,2)
+ integer, intent(in) :: h1,h2,p1,p2,s1,s2
+ double precision, intent(out) :: hthree
+ integer :: nexc(2),i,ispin,na,nb
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: particle(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_particle(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_particle(2)
+ integer(bit_kind)              :: det_tmp(N_int,2)
+ integer :: ipart, ihole
+ double precision :: direct_int, exchange_int, three_e_double_parrallel_spin
+
+  nexc(1) = 0
+  nexc(2) = 0
+  !! Get all the holes and particles of key_i with respect to the ROHF determinant
+  do i=1,N_int
+    hole(i,1)     = xor(key_i(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(key_i(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),key_i(i,1))
+    particle(i,2) = iand(hole(i,2),key_i(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+  if(s1==s2.and.s1==1)then
+   !!!!!!!!!!!!!!!!!!!!!!!!!! alpha/alpha double exc
+   hthree = eff_2_e_from_3_e_aa(p2,p1,h2,h1) 
+   if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant 
+    !!!!!!!! the matrix element is already exact 
+    !!!!!!!! else you need to take care of holes and particles 
+    !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!!
+    ispin = 1 ! i==alpha ==> pure same spin terms
+    do i = 1, nexc(ispin) ! number of couple of holes/particles 
+     ipart=occ_particle(i,ispin)
+     hthree += three_e_double_parrallel_spin(ipart,p2,h2,p1,h1)
+     ihole=occ_hole(i,ispin)
+     hthree -= three_e_double_parrallel_spin(ihole,p2,h2,p1,h1)
+    enddo
+    ispin = 2 ! i==beta ==> alpha/alpha/beta terms
+    do i = 1, nexc(ispin) ! number of couple of holes/particles 
+     ! exchange between (h1,p1) and (h2,p2)
+     ipart=occ_particle(i,ispin)
+     direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+     exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1)
+     hthree += direct_int - exchange_int
+     ihole=occ_hole(i,ispin)
+     direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+     exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1)
+     hthree -= direct_int - exchange_int
+    enddo
+   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  elseif(s1==s2.and.s1==2)then 
+   !!!!!!!!!!!!!!!!!!!!!!!!!! beta/beta double exc
+   hthree = eff_2_e_from_3_e_bb(p2,p1,h2,h1)
+   if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant 
+   !!!!!!!! the matrix element is already exact 
+   !!!!!!!! else you need to take care of holes and particles 
+   !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!!
+   ispin = 2 ! i==beta  ==> pure same spin terms
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ipart=occ_particle(i,ispin)
+    hthree += three_e_double_parrallel_spin(ipart,p2,h2,p1,h1)
+    ihole=occ_hole(i,ispin)
+    hthree -= three_e_double_parrallel_spin(ihole,p2,h2,p1,h1)
+   enddo
+   ispin = 1 ! i==alpha==> beta/beta/alpha terms
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ! exchange between (h1,p1) and (h2,p2)
+    ipart=occ_particle(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1)
+    hthree += direct_int - exchange_int
+    ihole=occ_hole(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1)
+    hthree -= direct_int - exchange_int
+   enddo
+  else                         ! (h1,p1) == alpha/(h2,p2) == beta 
+   hthree = eff_2_e_from_3_e_ab(p2,p1,h2,h1)
+   if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant 
+   !!!!!!!! the matrix element is already exact 
+   !!!!!!!! else you need to take care of holes and particles 
+   !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!!
+   ispin = 1 ! i==alpha ==> alpha/beta/alpha terms 
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ! exchange between (h1,p1) and i
+    ipart=occ_particle(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch13_bi_ort(ipart,p2,h2,p1,h1)
+    hthree += direct_int - exchange_int
+    ihole=occ_hole(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch13_bi_ort(ihole,p2,h2,p1,h1)
+    hthree -= direct_int - exchange_int
+   enddo
+   ispin = 2 ! i==beta  ==> alpha/beta/beta  terms 
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ! exchange between (h2,p2) and i
+    ipart=occ_particle(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch23_bi_ort(ipart,p2,h2,p1,h1)
+    hthree += direct_int - exchange_int
+    ihole=occ_hole(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch23_bi_ort(ihole,p2,h2,p1,h1)
+    hthree -= direct_int - exchange_int
+   enddo
+  endif
+end
+
+
 BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, mo_num)]
  implicit none
  BEGIN_DOC
diff --git a/src/tc_bi_ortho/test_normal_order.irp.f b/src/tc_bi_ortho/test_normal_order.irp.f
index 46705f5f..118e481a 100644
--- a/src/tc_bi_ortho/test_normal_order.irp.f
+++ b/src/tc_bi_ortho/test_normal_order.irp.f
@@ -40,13 +40,15 @@ subroutine test
      call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
      hthree *= phase
 !    !normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1)
-     normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1)
+     call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal)
+!     normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1)
      accu += dabs(hthree-normal)
     enddo
    enddo
   enddo
  enddo
 print*,'accu opposite spin = ',accu
+stop
 
 !    p2=6
 !    p1=5
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index 8afb3b25..551eced2 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -50,13 +50,14 @@ subroutine test_slater_tc_opt
      accu += dabs(htot-hnewtot) 
 !    if(dabs(hthree-hnewthree).gt.1.d-8.or.dabs(hthree-hnewthree).gt.dabs(hthree))then
     if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
-     print*,j,i
+     print*,j,i,degree
      call debug_det(psi_det(1,1,i),N_int)
      call debug_det(psi_det(1,1,j),N_int)
      print*,htot,hnewtot,dabs(htot-hnewtot) 
 !     print*,hthree,hnewthree,dabs(hthree-hnewthree) 
      stop
     endif
+    print*,htot,hnewtot,dabs(htot-hnewtot) 
    endif
   enddo
  enddo

From a5ded6cd59b1c30695836cdca07ae17834972140 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 20 Jan 2023 17:30:08 +0100
Subject: [PATCH 50/68] added providers for the totally symmetrized integrals

---
 src/tc_bi_ortho/slater_tc_opt_diag.irp.f      |   8 +-
 src/tc_bi_ortho/slater_tc_opt_double.irp.f    |  16 +-
 src/tc_bi_ortho/slater_tc_opt_single.irp.f    |   8 +-
 .../symmetrized_3_e_int_prov.irp.f            | 140 ++++++++++++++++++
 src/tc_bi_ortho/test_tc_bi_ortho.irp.f        |   2 +-
 5 files changed, 156 insertions(+), 18 deletions(-)
 create mode 100644 src/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f

diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
index 4048f481..c0b59969 100644
--- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
@@ -113,7 +113,7 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
   integer                        :: occ(Nint*bit_kind_size,2)
   integer                        :: other_spin
   integer                        :: k,l,i,jj,mm,j,m
-  double precision :: three_e_diag_parrallel_spin, direct_int, exchange_int
+  double precision ::  direct_int, exchange_int
   
 
   if (iorb < 1) then
@@ -163,7 +163,7 @@ subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
     jj = occ(j,ispin)
     do m = j+1, na
      mm = occ(m,ispin)
-     hthree += three_e_diag_parrallel_spin(mm,jj,iorb)
+     hthree += three_e_diag_parrallel_spin_prov(mm,jj,iorb)
     enddo
    enddo
    !! same-spin/oposite-spin
@@ -210,7 +210,7 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
   integer(bit_kind), intent(inout) :: key(Nint,2)
   double precision, intent(inout) :: hmono,htwoe,hthree
   
-  double precision  :: direct_int, exchange_int, three_e_diag_parrallel_spin
+  double precision  :: direct_int, exchange_int
   integer                        :: occ(Nint*bit_kind_size,2)
   integer                        :: other_spin
   integer                        :: k,l,i,jj,mm,j,m
@@ -250,7 +250,7 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
     jj = occ(j,ispin)
     do m = j+1, na
      mm = occ(m,ispin)
-     hthree -= three_e_diag_parrallel_spin(mm,jj,iorb)
+     hthree -= three_e_diag_parrallel_spin_prov(mm,jj,iorb)
     enddo
    enddo
    !! same-spin/oposite-spin
diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
index ca1d0eea..c16c673d 100644
--- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@@ -84,7 +84,7 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
  integer :: n_occ_ab_hole(2),n_occ_ab_particle(2)
  integer(bit_kind)              :: det_tmp(N_int,2)
  integer :: ipart, ihole
- double precision :: direct_int, exchange_int, three_e_double_parrallel_spin
+ double precision :: direct_int, exchange_int
 
   nexc(1) = 0
   nexc(2) = 0
@@ -118,9 +118,9 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
     ispin = 1 ! i==alpha ==> pure same spin terms
     do i = 1, nexc(ispin) ! number of couple of holes/particles 
      ipart=occ_particle(i,ispin)
-     hthree += three_e_double_parrallel_spin(ipart,p2,h2,p1,h1)
+     hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1)
      ihole=occ_hole(i,ispin)
-     hthree -= three_e_double_parrallel_spin(ihole,p2,h2,p1,h1)
+     hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1)
     enddo
     ispin = 2 ! i==beta ==> alpha/alpha/beta terms
     do i = 1, nexc(ispin) ! number of couple of holes/particles 
@@ -145,9 +145,9 @@ subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
    ispin = 2 ! i==beta  ==> pure same spin terms
    do i = 1, nexc(ispin) ! number of couple of holes/particles 
     ipart=occ_particle(i,ispin)
-    hthree += three_e_double_parrallel_spin(ipart,p2,h2,p1,h1)
+    hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1)
     ihole=occ_hole(i,ispin)
-    hthree -= three_e_double_parrallel_spin(ihole,p2,h2,p1,h1)
+    hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1)
    enddo
    ispin = 1 ! i==alpha==> beta/beta/alpha terms
    do i = 1, nexc(ispin) ! number of couple of holes/particles 
@@ -305,13 +305,12 @@ subroutine give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib)
  double precision, intent(out) :: contrib
  integer :: mm,m 
  double precision :: direct_int, exchange_int
- double precision :: three_e_double_parrallel_spin
  !! h1,p1 == alpha 
  !! h2,p2 == alpha
  contrib = 0.d0
  do mm = 1, Ne(1) !! alpha ==> pure parallele spin contribution
    m = occ(mm,1)
-   contrib += three_e_double_parrallel_spin(m,p2,h2,p1,h1)
+   contrib += three_e_double_parrallel_spin_prov(m,p2,h2,p1,h1)
  enddo
 
  do mm = 1, Ne(2) !! beta
@@ -371,13 +370,12 @@ subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib)
  double precision, intent(out) :: contrib
  integer :: mm,m 
  double precision :: direct_int, exchange_int
- double precision :: three_e_double_parrallel_spin
  !! h1,p1 == beta
  !! h2,p2 == beta
  contrib = 0.d0
  do mm = 1, Ne(2) !! beta ==> pure parallele spin contribution
    m = occ(mm,1)
-   contrib += three_e_double_parrallel_spin(m,p2,h2,p1,h1)
+   contrib += three_e_double_parrallel_spin_prov(m,p2,h2,p1,h1)
  enddo
 
  do mm = 1, Ne(1) !! alpha
diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
index cb9306aa..ae41591a 100644
--- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
@@ -196,7 +196,7 @@ subroutine fock_ac_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,
   integer                        :: occ(Nint*bit_kind_size,2)
   integer                        :: other_spin
   integer                        :: k,l,i,jj,j
-  double precision :: three_e_single_parrallel_spin, direct_int, exchange_int
+  double precision :: direct_int, exchange_int
   
 
   if (iorb < 1) then
@@ -236,7 +236,7 @@ subroutine fock_ac_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,
    !! jj = ispin = ispin_fock >> pure parallel spin
    do j = 1, na
     jj = occ(j,ispin)
-    hthree += three_e_single_parrallel_spin(jj,iorb,p_fock,h_fock)
+    hthree += three_e_single_parrallel_spin_prov(jj,iorb,p_fock,h_fock)
    enddo
    !! spin of jj == other spin than ispin AND ispin_fock
    !! exchange between the iorb and (h_fock, p_fock)
@@ -287,7 +287,7 @@ subroutine fock_a_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,N
   integer(bit_kind), intent(inout) :: key(Nint,2)
   double precision, intent(inout) :: hthree
   
-  double precision  :: direct_int, exchange_int, three_e_single_parrallel_spin
+  double precision  :: direct_int, exchange_int
   integer                        :: occ(Nint*bit_kind_size,2)
   integer                        :: other_spin
   integer                        :: k,l,i,jj,mm,j,m
@@ -315,7 +315,7 @@ subroutine fock_a_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,N
    !! jj = ispin = ispin_fock >> pure parallel spin
    do j = 1, na
     jj = occ(j,ispin)
-    hthree -= three_e_single_parrallel_spin(jj,iorb,p_fock,h_fock)
+    hthree -= three_e_single_parrallel_spin_prov(jj,iorb,p_fock,h_fock)
    enddo
    !! spin of jj == other spin than ispin AND ispin_fock
    !! exchange between the iorb and (h_fock, p_fock)
diff --git a/src/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f b/src/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f
new file mode 100644
index 00000000..e8277a74
--- /dev/null
+++ b/src/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f
@@ -0,0 +1,140 @@
+
+BEGIN_PROVIDER [ double precision, three_e_diag_parrallel_spin_prov, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS 
+  !
+  ! three_e_diag_parrallel_spin_prov(m,j,i) = All combinations of the form <mji|-L|mji> for same spin matrix elements  
+  ! 
+  ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin
+
+  three_e_diag_parrallel_spin_prov = 0.d0
+  print *, ' Providing the three_e_diag_parrallel_spin_prov ...'
+
+ integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs
+  call wall_time(wall0)
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        three_e_diag_parrallel_spin_prov(m,j,i) =  three_e_diag_parrallel_spin(m,j,i)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of <mjk|-L|mji> for same spin matrix elements 
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+ implicit none
+ integer          :: i, j, k, m
+ double precision :: integral, wall1, wall0, three_e_single_parrallel_spin
+
+  three_e_single_parrallel_spin_prov = 0.d0
+  print *, ' Providing the three_e_single_parrallel_spin_prov ...'
+
+  integral = three_e_single_parrallel_spin(1,1,1,1)
+  call wall_time(wall0)
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0
+
+END_PROVIDER 
+
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = <mlk|-L|mji> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0, three_e_double_parrallel_spin
+
+  three_e_double_parrallel_spin_prov = 0.d0
+  print *, ' Providing the three_e_double_parrallel_spin_prov ...'
+  call wall_time(wall0)
+
+ integral = three_e_double_parrallel_spin(1,1,1,1,1)
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0
+
+END_PROVIDER 
+
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index 551eced2..7d063c61 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -57,7 +57,7 @@ subroutine test_slater_tc_opt
 !     print*,hthree,hnewthree,dabs(hthree-hnewthree) 
      stop
     endif
-    print*,htot,hnewtot,dabs(htot-hnewtot) 
+!    print*,htot,hnewtot,dabs(htot-hnewtot) 
    endif
   enddo
  enddo

From d0fecfa84577d3f9eee07615bb4399ad33eebe69 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 20 Jan 2023 18:14:29 +0100
Subject: [PATCH 51/68] parallelized the two electron terms for opt doubles tc

---
 src/tc_bi_ortho/slater_tc_opt_double.irp.f | 24 ++++++++++++
 src/tc_bi_ortho/test_tc_bi_ortho.irp.f     | 43 +++++++++++++++++++++-
 2 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
index c16c673d..bd2d37a3 100644
--- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@@ -209,7 +209,13 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num,
  double precision :: contrib
  allocate( occ(N_int*bit_kind_size,2) )
  call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ call give_contrib_for_abab(1,1,1,1,occ,Ne,contrib)
  eff_2_e_from_3_e_ab = 0.d0
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & 
+ !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_ab)
+ !$OMP DO SCHEDULE (static) 
   do hh1 = 1, n_act_orb !! alpha 
     h1 = list_act(hh1) 
     do hh2 = 1, n_act_orb !! beta 
@@ -224,6 +230,8 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num,
       enddo
     enddo
   enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
 
 END_PROVIDER 
 
@@ -276,7 +284,13 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num,
  double precision :: contrib
  allocate( occ(N_int*bit_kind_size,2) )
  call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ call give_contrib_for_aaaa(1 ,1 ,1 ,1 ,occ,Ne,contrib)
  eff_2_e_from_3_e_aa = 100000000.d0
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & 
+ !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_aa)
+ !$OMP DO SCHEDULE (static) 
   do hh1 = 1, n_act_orb !! alpha 
     h1 = list_act(hh1) 
     do hh2 = hh1+1, n_act_orb !! alpha
@@ -291,6 +305,8 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num,
       enddo
     enddo
   enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
 
 END_PROVIDER 
 
@@ -341,7 +357,13 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num,
  double precision :: contrib
  allocate( occ(N_int*bit_kind_size,2) )
  call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ call give_contrib_for_bbbb(1,1 ,1 ,1 ,occ,Ne,contrib)
  eff_2_e_from_3_e_bb = 100000000.d0
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & 
+ !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_bb)
+ !$OMP DO SCHEDULE (static) 
   do hh1 = 1, n_act_orb !! beta 
     h1 = list_act(hh1) 
     do hh2 = hh1+1, n_act_orb !! beta
@@ -356,6 +378,8 @@ BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num,
       enddo
     enddo
   enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
 
 END_PROVIDER 
 
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index 7d063c61..66ca2e6a 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -11,7 +11,8 @@ program tc_bi_ortho
   touch read_wf
   touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 
-  call test_slater_tc_opt
+!  call test_slater_tc_opt
+ call timing_hij
 end
 
 subroutine test_slater_tc_opt
@@ -65,3 +66,43 @@ subroutine test_slater_tc_opt
  print*,'accu   = ',accu/i_count
 
 end
+
+
+subroutine timing_hij
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot
+! allocate(mat_old(N_det,N_det))
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot)
+ call wall_time(wall0)
+ do i = 1, N_det
+  do j = 1, N_det
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+!   mat_old(j,i) = htot
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'time for old hij = ',wall1 - wall0
+
+! allocate(mat_new(N_det,N_det))
+ call wall_time(wall0)
+ do i = 1, N_det
+  do j = 1, N_det
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+!   mat_new(j,i) = htot
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'time for new hij = ',wall1 - wall0
+ double precision :: accu
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+!   accu += dabs(mat_new(j,i) - mat_old(j,i))
+  enddo
+ enddo
+ print*,'accu = ',accu
+
+end

From d07bbacd8c160ec1b3d43d0dc6529a26e0e886ed Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Sat, 21 Jan 2023 12:21:31 +0100
Subject: [PATCH 52/68] minor modifs in test_tc_bi_ortho.irp.f

---
 src/tc_bi_ortho/test_tc_bi_ortho.irp.f | 35 +++-----------------------
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index 66ca2e6a..1b247da5 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -11,7 +11,7 @@ program tc_bi_ortho
   touch read_wf
   touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 
-!  call test_slater_tc_opt
+ call test_slater_tc_opt
  call timing_hij
 end
 
@@ -25,44 +25,15 @@ subroutine test_slater_tc_opt
  accu_d = 0.d0
  i_count = 0.d0
  do i = 1, N_det
-! do i = 1,1
-  call diag_htilde_mu_mat_bi_ortho(N_int, psi_det(1,1,i), hmono, htwoe, htot)
-  call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
-  call diag_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), hnewmono, hnewtwoe, hnewthree, hnewtot)
-!  print*,hthree,hnewthree
-!  print*,htot,hnewtot,dabs(hnewtot-htot)
-  accu_d += dabs(htot-hnewtot) 
-  if(dabs(htot-hnewtot).gt.1.d-8)then
-   print*,i
-   print*,htot,hnewtot,dabs(htot-hnewtot)
-  endif
-!  do j = 319,319
   do j = 1,N_det
-   if(i==j)cycle
-   integer :: degree 
-   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
-!   if(degree .ne. 1)cycle
    call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
    call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot)
-!   if(dabs(hthree).gt.1.d-15)then
    if(dabs(htot).gt.1.d-15)then
-!    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
-     i_count += 1.D0
-     accu += dabs(htot-hnewtot) 
-!    if(dabs(hthree-hnewthree).gt.1.d-8.or.dabs(hthree-hnewthree).gt.dabs(hthree))then
-    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
-     print*,j,i,degree
-     call debug_det(psi_det(1,1,i),N_int)
-     call debug_det(psi_det(1,1,j),N_int)
-     print*,htot,hnewtot,dabs(htot-hnewtot) 
-!     print*,hthree,hnewthree,dabs(hthree-hnewthree) 
-     stop
-    endif
-!    print*,htot,hnewtot,dabs(htot-hnewtot) 
+    i_count += 1.D0
+    accu += dabs(htot-hnewtot) 
    endif
   enddo
  enddo
- print*,'accu_d = ',accu_d/N_det
  print*,'accu   = ',accu/i_count
 
 end

From 20da577c4f7b1302512e2c5739ce415bbe2cc082 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Sun, 22 Jan 2023 17:00:55 +0100
Subject: [PATCH 53/68] added new timing test

---
 src/tc_bi_ortho/slater_tc_opt_double.irp.f |  22 ++-
 src/tc_bi_ortho/test_tc_bi_ortho.irp.f     | 204 ++++++++++++++++-----
 2 files changed, 173 insertions(+), 53 deletions(-)

diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
index bd2d37a3..9d33523b 100644
--- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@@ -32,19 +32,23 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe,
   if(degree.ne.2)then
    return
   endif
-
-  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+  integer :: degree_i,degree_j
+  call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int)
+  call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int)
   call get_double_excitation(key_i, key_j, exc, phase, Nint)
   call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
 
   if(s1.ne.s2)then
    ! opposite spin two-body 
-!   key_j, key_i
     htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
     if(three_body_h_tc)then
      if(.not.double_normal_ord)then
-      call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
-     elseif(double_normal_ord.and.+Ne(1).gt.2)then
+      if(degree_i>degree_j)then
+       call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree)
+      else
+       call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+      endif
+     elseif(double_normal_ord.and.elec_num+elec_num.gt.2)then
       htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ???
      endif
     endif
@@ -56,8 +60,12 @@ subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe,
    htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
    if(three_body_h_tc)then
     if(.not.double_normal_ord)then
-     call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
-    elseif(double_normal_ord.and.+Ne(1).gt.2)then
+     if(degree_i>degree_j)then
+      call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree)
+     else
+      call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+     endif
+    elseif(double_normal_ord.and.elec_num+elec_num.gt.2)then
       htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ???
       htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ???
     endif
diff --git a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
index 66ca2e6a..99352162 100644
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@@ -11,13 +11,16 @@ program tc_bi_ortho
   touch read_wf
   touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
 
-!  call test_slater_tc_opt
- call timing_hij
+! call test_slater_tc_opt
+ call timing_tot
+! call timing_diag
+! call timing_single
+! call timing_double
 end
 
 subroutine test_slater_tc_opt
  implicit none
- integer :: i,j
+ integer :: i,j,degree
  double precision :: hmono, htwoe, htot, hthree 
  double precision :: hnewmono, hnewtwoe, hnewthree, hnewtot
  double precision :: accu_d ,i_count, accu
@@ -25,84 +28,193 @@ subroutine test_slater_tc_opt
  accu_d = 0.d0
  i_count = 0.d0
  do i = 1, N_det
-! do i = 1,1
-  call diag_htilde_mu_mat_bi_ortho(N_int, psi_det(1,1,i), hmono, htwoe, htot)
-  call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
-  call diag_htilde_mu_mat_fock_bi_ortho(N_int, psi_det(1,1,i), hnewmono, hnewtwoe, hnewthree, hnewtot)
-!  print*,hthree,hnewthree
-!  print*,htot,hnewtot,dabs(hnewtot-htot)
-  accu_d += dabs(htot-hnewtot) 
-  if(dabs(htot-hnewtot).gt.1.d-8)then
-   print*,i
-   print*,htot,hnewtot,dabs(htot-hnewtot)
-  endif
-!  do j = 319,319
   do j = 1,N_det
-   if(i==j)cycle
-   integer :: degree 
-   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
-!   if(degree .ne. 1)cycle
    call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
    call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot)
-!   if(dabs(hthree).gt.1.d-15)then
    if(dabs(htot).gt.1.d-15)then
-!    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
      i_count += 1.D0
      accu += dabs(htot-hnewtot) 
-!    if(dabs(hthree-hnewthree).gt.1.d-8.or.dabs(hthree-hnewthree).gt.dabs(hthree))then
-    if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
-     print*,j,i,degree
-     call debug_det(psi_det(1,1,i),N_int)
-     call debug_det(psi_det(1,1,j),N_int)
-     print*,htot,hnewtot,dabs(htot-hnewtot) 
-!     print*,hthree,hnewthree,dabs(hthree-hnewthree) 
-     stop
-    endif
-!    print*,htot,hnewtot,dabs(htot-hnewtot) 
+     if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
+      call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+      print*,j,i,degree
+      call debug_det(psi_det(1,1,i),N_int)
+      call debug_det(psi_det(1,1,j),N_int)
+      print*,htot,hnewtot,dabs(htot-hnewtot)
+      print*,hthree,hnewthree,dabs(hthree-hnewthree)
+      stop
+     endif
    endif
   enddo
  enddo
- print*,'accu_d = ',accu_d/N_det
  print*,'accu   = ',accu/i_count
 
 end
 
-
-subroutine timing_hij
+subroutine timing_tot
  implicit none
  integer :: i,j
  double precision :: wall0, wall1
  double precision, allocatable :: mat_old(:,:),mat_new(:,:)
- double precision :: hmono, htwoe, hthree, htot
-! allocate(mat_old(N_det,N_det))
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,2), N_int, hmono, htwoe, hthree, htot)
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,2), N_int, hmono, htwoe, hthree, htot)
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+!   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   i_count += 1.d0
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for old hij for total   = ',wall1 - wall0
+
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+!   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   i_count += 1.d0
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij for total   = ',wall1 - wall0
+ call i_H_j(psi_det(1,1,1), psi_det(1,1,2),N_int,htot)
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call i_H_j(psi_det(1,1,j), psi_det(1,1,i),N_int,htot)
+   i_count += 1.d0
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij STANDARD    = ',wall1 - wall0
+
+end
+
+subroutine timing_diag
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
  call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot)
  call wall_time(wall0)
+ i_count = 0.d0
  do i = 1, N_det
-  do j = 1, N_det
+  do j = i,i 
+   i_count += 1.d0
    call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
-!   mat_old(j,i) = htot
   enddo
  enddo
  call wall_time(wall1)
- print*,'time for old hij = ',wall1 - wall0
+ print*,'i_count = ',i_count
+ print*,'time for old hij for diagonal= ',wall1 - wall0
 
-! allocate(mat_new(N_det,N_det))
  call wall_time(wall0)
+ i_count = 0.d0
  do i = 1, N_det
-  do j = 1, N_det
+  do j = i,i
+   i_count += 1.d0
    call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
-!   mat_new(j,i) = htot
   enddo
  enddo
  call wall_time(wall1)
- print*,'time for new hij = ',wall1 - wall0
- double precision :: accu
+ print*,'i_count = ',i_count
+ print*,'time for new hij for diagonal= ',wall1 - wall0
+
+end
+
+subroutine timing_single
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1,accu
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot)
+ i_count = 0.d0
  accu = 0.d0
  do i = 1, N_det
   do j = 1, N_det
-!   accu += dabs(mat_new(j,i) - mat_old(j,i))
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.1)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
   enddo
  enddo
- print*,'accu = ',accu
+ print*,'i_count = ',i_count
+ print*,'time for old hij for singles = ',accu
+
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.1)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ print*,'i_count = ',i_count
+ print*,'time for new hij for singles = ',accu
 
 end
+
+subroutine timing_double
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1,accu
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot)
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.2)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ print*,'i_count = ',i_count
+ print*,'time for old hij for doubles = ',accu
+
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.2)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij for doubles = ',accu
+
+end
+

From e1e9ae0941bd366c2f3f717d67a6e11cbe45502e Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Mon, 23 Jan 2023 19:04:33 +0100
Subject: [PATCH 54/68] fix warning

---
 src/ao_one_e_ints/point_charges.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ao_one_e_ints/point_charges.irp.f b/src/ao_one_e_ints/point_charges.irp.f
index c038458d..82388c0d 100644
--- a/src/ao_one_e_ints/point_charges.irp.f
+++ b/src/ao_one_e_ints/point_charges.irp.f
@@ -156,7 +156,7 @@ BEGIN_PROVIDER [ double precision, pts_charge_coord, (n_pts_charge,3) ]
   endif
  print*,'Coordinates for the point charges '
  do i = 1, n_pts_charge
-  write(*,'(I3,X,3(F16.8,X))'),i,pts_charge_coord(i,1:3)
+  write(*,'(I3,X,3(F16.8,X))') i,pts_charge_coord(i,1:3)
  enddo
 
 END_PROVIDER

From af60a02919b9923772398abf877e79dd0eed160d Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Tue, 24 Jan 2023 13:49:04 +0100
Subject: [PATCH 55/68] it works with same left and right coefficients for 2
 dets in WF

---
 src/cipsi_tc_bi_ortho/get_d.irp.f             | 612 +++++++++++-------
 .../pt2_stoch_routines.irp.f                  |   2 +-
 .../run_selection_slave.irp.f                 |   2 +-
 src/cipsi_tc_bi_ortho/selection.irp.f         |  97 ++-
 src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f  |   1 +
 src/determinants/fock_diag.irp.f              |  34 +-
 src/fci_tc_bi/selectors.irp.f                 |  10 +-
 src/tc_bi_ortho/slater_tc_opt.irp.f           |  42 ++
 src/tc_bi_ortho/slater_tc_opt_diag.irp.f      | 194 ++++++
 src/tc_bi_ortho/slater_tc_opt_double.irp.f    |  55 ++
 src/tc_bi_ortho/slater_tc_opt_single.irp.f    | 112 ++++
 11 files changed, 890 insertions(+), 271 deletions(-)

diff --git a/src/cipsi_tc_bi_ortho/get_d.irp.f b/src/cipsi_tc_bi_ortho/get_d.irp.f
index c642f420..58b1972a 100644
--- a/src/cipsi_tc_bi_ortho/get_d.irp.f
+++ b/src/cipsi_tc_bi_ortho/get_d.irp.f
@@ -194,7 +194,7 @@ end subroutine get_d3_h
 
 ! ---
 
-subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+subroutine get_d2(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
 
   use bitmasks
   implicit none
@@ -202,15 +202,15 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
   integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2)
   integer(bit_kind), intent(in) :: phasemask(N_int,2)
   logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
-  double precision, intent(in) :: coefs(N_states)
-  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  double precision, intent(in) :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
   integer, intent(in) :: h(0:2,2), p(0:4,2), sp
 
-  double precision, external :: get_phase_bi, mo_two_e_integral
+  double precision, external :: get_phase_bi
 
   integer :: i, j, k, tip, ma, mi, puti, putj
   integer :: h1, h2, p1, p2, i1, i2
-  double precision :: hij, phase
+  double precision :: hij, hji, phase
 
   integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
   integer, parameter :: turn2(2) = (/2, 1/)
@@ -222,11 +222,13 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
   tip = p(0,1) * p(0,2)
 
   ma = sp
+  print*,'in get d2'
   if(p(0,1) > p(0,2)) ma = 1
   if(p(0,1) < p(0,2)) ma = 2
   mi = mod(ma, 2) + 1
 
   if(sp == 3) then
+    print*,'in sp == 3'
     if(ma == 2) bant = 2
 
     if(tip == 3) then
@@ -247,20 +249,24 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
         ! 
         !                      <p1 p2 | h1 h2>        -            <p2 p1 | h1 h2 >
         ! < p2 p1 | H^tilde^dag| h1 h2 > = < h1 h2 | w_ee^h + t^nh | p1 p2 >
-        hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2)
+        hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2, p1, h1, h2)
+        hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e(h2, h1, p1, p2)
         if (hij == 0.d0) cycle
 
         hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+        hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
 
         if(ma == 1) then
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij
+            mat_p(k, putj, puti) = mat_p(k, putj, puti) + coefs(k,1) * hij
+            mat_m(k, putj, puti) = mat_m(k, putj, puti) + coefs(k,2) * hji
           enddo
         else
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k,1) * hij
+            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k,2) * hji
           enddo
         end if
       end do
@@ -277,12 +283,15 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
           p1 = p(turn2(i), 1)
 
-          hij = mo_two_e_integral(p1, p2, h1, h2)
+          hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2)
+          hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2)
           if (hij /= 0.d0) then
             hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            hji = hji * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
             !DIR$ LOOP COUNT AVG(4)
             do k=1,N_states
-              mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+              mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k,1) * hij
+              mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k,2) * hji
             enddo
           endif
         end do
@@ -290,6 +299,7 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
     end if
 
   else
+    print*,'NOT in sp == 3'
     if(tip == 0) then
       h1 = h(1, ma)
       h2 = h(2, ma)
@@ -305,13 +315,16 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           i2 = turn2d(2, i, j)
           p1 = p(i1, ma)
           p2 = p(i2, ma)
-          hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)
+          hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)
+          hji = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e(h2,h1, p1, p2)
           if (hij == 0.d0) cycle
 
           hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
+            mat_p(k, puti, putj) = mat_p(k, puti, putj) +coefs(k,1) * hij
+            mat_m(k, puti, putj) = mat_m(k, puti, putj) +coefs(k,2) * hji
           enddo
         end do
       end do
@@ -327,19 +340,23 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
         if(banned(puti,putj,1)) cycle
         p2 = p(i, ma)
 
-        hij = mo_two_e_integral(p1, p2, h1, h2)
+        hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2)
+        hji = mo_bi_ortho_tc_two_e(h1, h2, p1, p2)
         if (hij == 0.d0) cycle
 
         hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        hji = hji * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
         if (puti < putj) then
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k,1) * hij
+            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k,2) * hji
           enddo
         else
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij
+            mat_p(k, putj, puti) = mat_p(k, putj, puti) + coefs(k,1) * hij
+            mat_m(k, putj, puti) = mat_m(k, putj, puti) + coefs(k,2) * hji
           enddo
         endif
       end do
@@ -351,12 +368,15 @@ subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
         p2 = p(2, mi)
         h1 = h(1, mi)
         h2 = h(2, mi)
-        hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2))
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2))
+        hji = (mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e(h2,h1, p1, p2))
         if (hij /= 0.d0) then
           hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          hji = hji * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k,1) * hij
+            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k,2) * hji
           enddo
         end if
       end if
@@ -367,7 +387,7 @@ end subroutine get_d2
 
 ! ---
 
-subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+subroutine get_d1(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
 
   use bitmasks
   implicit none
@@ -376,28 +396,34 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
   integer(bit_kind), intent(in)  :: phasemask(N_int,2)
   logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
   integer(bit_kind)              :: det(N_int, 2)
-  double precision, intent(in)   :: coefs(N_states)
-  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  double precision, intent(in)   :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
   integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
-  double precision, external     :: get_phase_bi, mo_two_e_integral
+  double precision, external     :: get_phase_bi
   logical                        :: ok
 
   logical, allocatable           :: lbanned(:,:)
   integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
   integer                        :: hfix, pfix, h1, h2, p1, p2, ib, k, l
+  integer :: mm
 
   integer, parameter             :: turn2(2) = (/2,1/)
   integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
 
   integer                        :: bant
-  double precision, allocatable :: hij_cache(:,:)
-  double precision               :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num)
+  double precision, allocatable  :: hij_cache(:,:)
+  double precision               :: hij, tmp_row_ij(N_states, mo_num), tmp_row_ij2(N_states, mo_num)
+  double precision, allocatable  :: hji_cache(:,:)
+  double precision               :: hji, tmp_row_ji(N_states, mo_num), tmp_row_ji2(N_states, mo_num)
 
   PROVIDE mo_integrals_map N_int
 
   allocate (lbanned(mo_num, 2))
   allocate (hij_cache(mo_num,2))
+  allocate (hji_cache(mo_num,2))
   lbanned = bannedOrb
+  print*,'in get d1'
+  call debug_det(gen, N_int)
 
   do i=1, p(0,1)
     lbanned(p(i,1), 1) = .true.
@@ -413,16 +439,26 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
   bant = 1
 
   if(sp == 3) then
+    print*,'in sp == 3'
     !move MA
     if(ma == 2) bant = 2
     puti = p(1,mi)
     hfix = h(1,ma)
     p1 = p(1,ma)
     p2 = p(2,ma)
+    print*,puti, hfix,p1,p2
     if(.not. bannedOrb(puti, mi)) then
-      call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map)
-      call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map)
-      tmp_row = 0d0
+!      print*,'not banned'
+      do mm = 1, mo_num
+       hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,p2,mm,hfix)
+       hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,p1,mm,hfix)
+       hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2)
+       hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1)
+      enddo
+!      call get_mo_bi_ortho_tc_two_es(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map)
+!      call get_mo_bi_ortho_tc_two_es(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map)
+      tmp_row_ij = 0d0
+      tmp_row_ji = 0d0
       do putj=1, hfix-1
         if(lbanned(putj, ma)) cycle
         if(banned(putj, puti,bant)) cycle
@@ -431,7 +467,15 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+            tmp_row_ij(k,putj) = tmp_row_ij(k,putj) + hij * coefs(k,2)
+          enddo
+        endif
+        hji = hji_cache(putj,1) - hji_cache(putj,2)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row_ji(k,putj) = tmp_row_ji(k,putj) + hji * coefs(k,1)
           enddo
         endif
       end do
@@ -443,18 +487,28 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+            tmp_row_ij(k,putj) = tmp_row_ij(k,putj) + hij * coefs(k,2)
+          enddo
+        endif
+        hji = hji_cache(putj,2) - hji_cache(putj,1)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row_ji(k,putj) = tmp_row_ji(k,putj) + hji * coefs(k,1)
           enddo
         endif
       end do
 
       if(ma == 1) then
-        mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
+        mat_p(1:N_states,1:mo_num,puti) = mat_p(1:N_states,1:mo_num,puti) + tmp_row_ij(1:N_states,1:mo_num)
+        mat_m(1:N_states,1:mo_num,puti) = mat_m(1:N_states,1:mo_num,puti) + tmp_row_ji(1:N_states,1:mo_num)
       else
         do l=1,mo_num
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            mat(k,puti,l) = mat(k,puti,l) + tmp_row(k,l)
+            mat_p(k,puti,l) = mat_p(k,puti,l) + tmp_row_ij(k,l)
+            mat_m(k,puti,l) = mat_m(k,puti,l) + tmp_row_ji(k,l)
           enddo
         enddo
       end if
@@ -462,10 +516,18 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
 
     !MOVE MI
     pfix = p(1,mi)
-    tmp_row = 0d0
-    tmp_row2 = 0d0
-    call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map)
-    call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map)
+    tmp_row_ij = 0d0
+    tmp_row_ij2 = 0d0
+    tmp_row_ji = 0d0
+    tmp_row_ji2 = 0d0
+!    call get_mo_bi_ortho_tc_two_es(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map)
+!    call get_mo_bi_ortho_tc_two_es(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map)
+    do mm = 1, mo_num
+     hji_cache(mm,1) = mo_bi_ortho_tc_two_e(pfix,p1,mm,hfix)
+     hji_cache(mm,2) = mo_bi_ortho_tc_two_e(pfix,p2,mm,hfix)
+     hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1)
+     hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2)
+    enddo
     putj = p1
     do puti = 1, mo_num !HOT
 
@@ -478,7 +540,15 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+            tmp_row_ij(k,puti) = tmp_row_ij(k,puti) + hij * coefs(k,2)
+          enddo
+        endif
+        hji = hji_cache(puti,2)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row_ji(k,puti) = tmp_row_ji(k,puti) + hji * coefs(k,1)
           enddo
         endif
       endif
@@ -489,7 +559,14 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
         if (hij /= 0.d0) then
           hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
           do k=1,N_states
-            tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+            tmp_row_ij2(k,puti) = tmp_row_ij2(k,puti) + hij * coefs(k,2)
+          enddo
+        endif
+        hji = hji_cache(puti,1)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_row_ji2(k,puti) = tmp_row_ji2(k,puti) + hji * coefs(k,1)
           enddo
         endif
       endif
@@ -497,19 +574,24 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
     enddo
 
     if(mi == 1) then
-      mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
-      mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
+      mat_p(:,:,p1) = mat_p(:,:,p1) + tmp_row_ij(:,:)
+      mat_p(:,:,p2) = mat_p(:,:,p2) + tmp_row_ij2(:,:)
+      mat_m(:,:,p1) = mat_m(:,:,p1) + tmp_row_ji(:,:)
+      mat_m(:,:,p2) = mat_m(:,:,p2) + tmp_row_ji2(:,:)
     else
       do l=1,mo_num
         !DIR$ LOOP COUNT AVG(4)
         do k=1,N_states
-          mat(k,p1,l) = mat(k,p1,l) + tmp_row(k,l)
-          mat(k,p2,l) = mat(k,p2,l) + tmp_row2(k,l)
+          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_ij(k,l)
+          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_ij2(k,l)
+          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_ji(k,l)
+          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_ji2(k,l)
         enddo
       enddo
     end if
 
   else  ! sp /= 3
+    print*,'not in sp == 3'
 
     if(p(0,ma) == 3) then
       do i=1,3
@@ -517,16 +599,28 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
         puti = p(i, ma)
         p1 = p(turn3(1,i), ma)
         p2 = p(turn3(2,i), ma)
-        call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map)
-        call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map)
-        tmp_row = 0d0
+!        call get_mo_bi_ortho_tc_two_es(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map)
+!        call get_mo_bi_ortho_tc_two_es(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map)
+        do mm = 1, mo_num
+         hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,p2,mm,hfix)
+         hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,p1,mm,hfix)
+         hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2)
+         hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1)
+        enddo
+        tmp_row_ij = 0d0
+        tmp_row_ji = 0d0
         do putj=1,hfix-1
           if(banned(putj,puti,1)) cycle
           if(lbanned(putj,ma)) cycle
           hij = hij_cache(putj,1) - hij_cache(putj,2)
           if (hij /= 0.d0) then
             hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
-            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+            tmp_row_ij(:,putj) = tmp_row_ij(:,putj) + hij * coefs(:,1)
+          endif
+          hji = hji_cache(putj,1) - hji_cache(putj,2)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_row_ji(:,putj) = tmp_row_ji(:,putj) + hji * coefs(:,2)
           endif
         end do
         do putj=hfix+1,mo_num
@@ -535,15 +629,22 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           hij = hij_cache(putj,2) - hij_cache(putj,1)
           if (hij /= 0.d0) then
             hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
-            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+            tmp_row_ij(:,putj) = tmp_row_ij(:,putj) + hij * coefs(:,1)
+          endif
+          hji = hji_cache(putj,2) - hji_cache(putj,1)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_row_ji(:,putj) = tmp_row_ji(:,putj) + hji * coefs(:,2)
           endif
         end do
 
-        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
+        mat_p(:, :puti-1, puti) = mat_p(:, :puti-1, puti) + tmp_row_ij(:,:puti-1)
+        mat_m(:, :puti-1, puti) = mat_m(:, :puti-1, puti) + tmp_row_ji(:,:puti-1)
         do l=puti,mo_num
           !DIR$ LOOP COUNT AVG(4)
           do k=1,N_states
-            mat(k, puti, l) = mat(k, puti,l) + tmp_row(k,l)
+            mat_p(k, puti, l) = mat_p(k, puti,l) + tmp_row_ij(k,l)
+            mat_m(k, puti, l) = mat_m(k, puti,l) + tmp_row_ji(k,l)
           enddo
         enddo
       end do
@@ -552,10 +653,18 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
       pfix = p(1,mi)
       p1 = p(1,ma)
       p2 = p(2,ma)
-      tmp_row = 0d0
-      tmp_row2 = 0d0
-      call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map)
-      call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map)
+      tmp_row_ij = 0d0
+      tmp_row_ij2 = 0d0
+      tmp_row_ji = 0d0
+      tmp_row_ji2 = 0d0
+!      call get_mo_bi_ortho_tc_two_es(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map)
+!      call get_mo_bi_ortho_tc_two_es(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map)
+      do mm = 1, mo_num
+       hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,pfix,mm,hfix)
+       hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,pfix,mm,hfix)
+       hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,pfix)
+       hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,pfix)
+      enddo
       putj = p2
       do puti=1,mo_num
         if(lbanned(puti,ma)) cycle
@@ -566,7 +675,15 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
             hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
             !DIR$ LOOP COUNT AVG(4)
             do k=1,N_states
-              tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+              tmp_row_ij(k,puti) = tmp_row_ij(k,puti) + hij * coefs(k,1)
+            enddo
+          endif
+          hji = hji_cache(puti,1)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_row_ji(k,puti) = tmp_row_ji(k,puti) + hji * coefs(k,2)
             enddo
           endif
         end if
@@ -577,23 +694,34 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
           if (hij /= 0.d0) then
             hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
             do k=1,N_states
-              tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+              tmp_row_ij2(k,puti) = tmp_row_ij2(k,puti) + hij * coefs(k,1)
+            enddo
+          endif
+          hji = hji_cache(puti,2)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_row_ji2(k,puti) = tmp_row_ji2(k,puti) + hji * coefs(k,2)
             enddo
           endif
         end if
       end do
-      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
+      mat_p(:,:p2-1,p2) = mat_p(:,:p2-1,p2) + tmp_row_ij(:,:p2-1)
+      mat_m(:,:p2-1,p2) = mat_m(:,:p2-1,p2) + tmp_row_ji(:,:p2-1)
       do l=p2,mo_num
         !DIR$ LOOP COUNT AVG(4)
         do k=1,N_states
-          mat(k,p2,l) = mat(k,p2,l) + tmp_row(k,l)
+          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_ij(k,l)
+          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_ji(k,l)
         enddo
       enddo
-      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
+      mat_p(:,:p1-1,p1) = mat_p(:,:p1-1,p1) + tmp_row_ij2(:,:p1-1)
+      mat_m(:,:p1-1,p1) = mat_m(:,:p1-1,p1) + tmp_row_ji2(:,:p1-1)
       do l=p1,mo_num
         !DIR$ LOOP COUNT AVG(4)
         do k=1,N_states
-          mat(k,p1,l) = mat(k,p1,l) + tmp_row2(k,l)
+          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_ij2(k,l)
+          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_ji2(k,l)
         enddo
       enddo
     end if
@@ -617,10 +745,17 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
         p2 = p(i2,s2)
         if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
         call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
-        call i_h_j(gen, det, N_int, hij)
+!        call i_h_j(gen, det, N_int, hij)
+        !!!! GUESS ON THE ORDER OF DETS 
+        print*,'compute hij'
+!        hij = 0.d0
+!        hji = 0.d0
+        call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji)
+        call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij)
         !DIR$ LOOP COUNT AVG(4)
         do k = 1, N_states
-          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij
+          mat_p(k, p1, p2) = mat_p(k, p1, p2) + coefs(k,1) * hij
+          mat_m(k, p1, p2) = mat_m(k, p1, p2) + coefs(k,2) * hji
         enddo
       enddo
     enddo
@@ -629,7 +764,7 @@ end subroutine get_d1
 
 ! ---
 
-subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+subroutine get_d0(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
 
   use bitmasks
   implicit none
@@ -638,72 +773,103 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
   integer(bit_kind), intent(in) :: phasemask(N_int,2)
   logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
   integer(bit_kind) :: det(N_int, 2)
-  double precision, intent(in) :: coefs(N_states)
-  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  double precision, intent(in) :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_m(N_states, mo_num, mo_num)
+  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num)
   integer, intent(in) :: h(0:2,2), p(0:4,2), sp
 
-  integer :: i, j, k, s, h1, h2, p1, p2, puti, putj
-  double precision :: hij, phase
-  double precision, external :: get_phase_bi, mo_two_e_integral
+  integer :: i, j, k, s, h1, h2, p1, p2, puti, putj, mm
+  double precision :: hij, phase, hji
+  double precision, external :: get_phase_bi
   logical :: ok
 
   integer, parameter :: bant=1
   double precision, allocatable :: hij_cache1(:), hij_cache2(:)
   allocate (hij_cache1(mo_num),hij_cache2(mo_num))
+  double precision, allocatable :: hji_cache1(:), hji_cache2(:)
+  allocate (hji_cache1(mo_num),hji_cache2(mo_num))
 
+  print*,'in get d0'
+!  call debug_det(gen, N_int)
 
   if(sp == 3) then ! AB
     h1 = p(1,1)
     h2 = p(1,2)
+!   print*,'in AB'
     do p1=1, mo_num
       if(bannedOrb(p1, 1)) cycle
-      call get_mo_two_e_integrals(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map)
+!      call get_mo_bi_ortho_tc_two_es(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map)
+      do mm =1, mo_num
+       hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,p1,h2,h1)
+       hji_cache1(mm) = mo_bi_ortho_tc_two_e(h2,h1,mm,p1)
+      enddo
       do p2=1, mo_num
         if(bannedOrb(p2,2)) cycle
         if(banned(p1, p2, bant)) cycle ! rentable?
         if(p1 == h1 .or. p2 == h2) then
+!          print*,'in p1 == h1 or p2 == h2'
           call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
-          call i_h_j(gen, det, N_int, hij)
+!          call i_h_j(gen, det, N_int, hij)
+           !!! GUESS ON THE ORDER 
+          call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji)
         else
+!          print*,'ELSE '
           phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
           hij = hij_cache1(p2) * phase
+          hji = hji_cache1(p2) * phase
         end if
         if (hij == 0.d0) cycle
         !DIR$ LOOP COUNT AVG(4)
         do k=1,N_states
-          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij  ! HOTSPOT
+          mat_p(k, p1, p2) = mat_p(k, p1, p2) + coefs(k,1) * hij  ! HOTSPOT
+          mat_m(k, p1, p2) = mat_m(k, p1, p2) + coefs(k,2) * hji  ! HOTSPOT
         enddo
       end do
     end do
 
   else ! AA BB
+!    print*, 'in AA BB' 
     p1 = p(1,sp)
     p2 = p(2,sp)
     do puti=1, mo_num
       if(bannedOrb(puti, sp)) cycle
-      call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map)
-      call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map)
+      do mm = 1, mo_num
+       hij_cache1(mm) = mo_bi_ortho_tc_two_e(p2,p1,mm,puti)
+       hij_cache2(mm) = mo_bi_ortho_tc_two_e(p1,p2,mm,puti)
+       hji_cache1(mm) = mo_bi_ortho_tc_two_e(mm,puti,p2,p1)
+       hji_cache2(mm) = mo_bi_ortho_tc_two_e(mm,puti,p1,p2)
+      enddo
+!      call get_mo_bi_ortho_tc_two_es(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map)
+!      call get_mo_bi_ortho_tc_two_es(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map)
       do putj=puti+1, mo_num
         if(bannedOrb(putj, sp)) cycle
         if(banned(puti, putj, bant)) cycle ! rentable?
         if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
           call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
-          call i_h_j(gen, det, N_int, hij)
-          if (hij == 0.d0) cycle
+!          call i_h_j(gen, det, N_int, hij)
+          !!! GUESS 
+          call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hij)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hji)
+          if (hij == 0.d0.or.hji == 0.d0) cycle
         else
-          hij = (mo_two_e_integral(p1, p2, puti, putj) -  mo_two_e_integral(p2, p1, puti, putj))
-          if (hij == 0.d0) cycle
+          hji = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) -  mo_bi_ortho_tc_two_e(p2, p1, puti, putj))
+          hij = (mo_bi_ortho_tc_two_e(puti, putj, p1, p2) -  mo_bi_ortho_tc_two_e(puti, putj, p2, p1))
+          if (hij == 0.d0.or.hji==0.d0) cycle
           hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+          hji = hji * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
         end if
         !DIR$ LOOP COUNT AVG(4)
         do k=1,N_states
-          mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+          mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k,1) * hij
+          mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k,2) * hji
         enddo
       end do
     end do
   end if
 
-  deallocate(hij_cache1,hij_cache2)
+!  deallocate(hij_cache1,hij_cache2)
+!  deallocate(hji_cache1,hji_cache2)
 
 end subroutine get_d0
 
@@ -734,7 +900,7 @@ end subroutine get_d0
 !  double precision                :: hij_p, hij_m, phase
 !
 !  double precision, external      :: get_phase_bi
-!  double precision, external      :: get_mo_two_e_integral_tc_int, get_mo_two_e_integral_tcdag_int 
+!  double precision, external      :: get_mo_bi_ortho_tc_two_e_tc_int, get_mo_bi_ortho_tc_two_e_tcdag_int 
 !
 !  PROVIDE mo_integrals_tc_int_map mo_integrals_tcdag_int_map
 !
@@ -763,10 +929,10 @@ end subroutine get_d0
 !        p1 = p(i1, ma)
 !        p2 = p(i2, ma)
 !
-!        hij_p = get_mo_two_e_integral_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
-!              - get_mo_two_e_integral_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
-!        hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
-!              - get_mo_two_e_integral_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
+!        hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
+!              - get_mo_bi_ortho_tc_two_e_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
+!        hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
+!              - get_mo_bi_ortho_tc_two_e_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
 !
 !        if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle
 !
@@ -802,8 +968,8 @@ end subroutine get_d0
 !          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
 !          p1 = p(turn2(i), 1)
 !
-!          hij_p = get_mo_two_e_integral_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   )
-!          hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map)
+!          hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   )
+!          hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map)
 !
 !          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
 !            hij_p = hij_p * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
@@ -835,10 +1001,10 @@ end subroutine get_d0
 !          p1 = p(i1, ma)
 !          p2 = p(i2, ma)
 !
-!          hij_p = get_mo_two_e_integral_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
-!                - get_mo_two_e_integral_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
-!          hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
-!                - get_mo_two_e_integral_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
+!          hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
+!                - get_mo_bi_ortho_tc_two_e_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
+!          hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
+!                - get_mo_bi_ortho_tc_two_e_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
 !
 !          if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle
 !
@@ -865,8 +1031,8 @@ end subroutine get_d0
 !        if(banned(puti,putj,1)) cycle
 !        p2 = p(i, ma)
 !
-!        hij_p = get_mo_two_e_integral_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   )
-!        hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map)
+!        hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   )
+!        hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map)
 !
 !        if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle
 !
@@ -895,10 +1061,10 @@ end subroutine get_d0
 !        h1 = h(1, mi)
 !        h2 = h(2, mi)
 !
-!        hij_p = get_mo_two_e_integral_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
-!              - get_mo_two_e_integral_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
-!        hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
-!              - get_mo_two_e_integral_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
+!        hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
+!              - get_mo_bi_ortho_tc_two_e_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
+!        hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
+!              - get_mo_bi_ortho_tc_two_e_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
 !
 !        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
 !          hij_p = hij_p * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
@@ -937,15 +1103,15 @@ end subroutine get_d0
 !  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
 !
 !  double precision, external      :: get_phase_bi
-!  double precision, external      :: get_mo_two_e_integral_tc_int, get_mo_two_e_integral_tcdag_int
+!  double precision, external      :: get_mo_bi_ortho_tc_two_e_tc_int, get_mo_bi_ortho_tc_two_e_tcdag_int
 !
 !  logical                         :: ok
 !  logical, allocatable            :: lbanned(:,:)
 !  integer                         :: bant
 !  integer                         :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
 !  integer                         :: hfix, pfix, h1, h2, p1, p2, ib, k, l
-!  double precision                :: tmp_row_p (N_states, mo_num), tmp_row_m (N_states, mo_num)
-!  double precision                :: hij_p, hij_m, tmp_row2_p(N_states, mo_num), tmp_row2_m(N_states, mo_num)
+!  double precision                :: tmp_row_ij_p (N_states, mo_num), tmp_row_ij_m (N_states, mo_num)
+!  double precision                :: hij_p, hij_m, tmp_row_ij2_p(N_states, mo_num), tmp_row_ij2_m(N_states, mo_num)
 !  double precision, allocatable   :: hijp_cache(:,:), hijm_cache(:,:)
 !
 !  integer, parameter              :: turn2(2) = (/2,1/)
@@ -979,13 +1145,13 @@ end subroutine get_d0
 !    p2 = p(2,ma)
 !    if(.not. bannedOrb(puti, mi)) then
 !
-!      call get_mo_two_e_integrals_tc_int   (hfix, p1, p2, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
-!      call get_mo_two_e_integrals_tc_int   (hfix, p2, p1, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
-!      call get_mo_two_e_integrals_tcdag_int(hfix, p1, p2, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
-!      call get_mo_two_e_integrals_tcdag_int(hfix, p2, p1, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p1, p2, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p2, p1, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p1, p2, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p2, p1, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
 !
-!      tmp_row_p = 0d0
-!      tmp_row_m = 0d0
+!      tmp_row_ij_p = 0d0
+!      tmp_row_ij_m = 0d0
 !      do putj=1, hfix-1
 !        if(lbanned(putj, ma)) cycle
 !        if(banned(putj, puti,bant)) cycle
@@ -998,8 +1164,8 @@ end subroutine get_d0
 !          hij_m = hij_m * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
 !          !DIR$ LOOP COUNT AVG(4)
 !          do k=1,N_states
-!            tmp_row_p(k,putj) = tmp_row_p(k,putj) + hij_p * coefs(k)
-!            tmp_row_m(k,putj) = tmp_row_m(k,putj) + hij_m * coefs(k)
+!            tmp_row_ij_p(k,putj) = tmp_row_ij_p(k,putj) + hij_p * coefs(k)
+!            tmp_row_ij_m(k,putj) = tmp_row_ij_m(k,putj) + hij_m * coefs(k)
 !          enddo
 !        endif
 !      end do
@@ -1015,21 +1181,21 @@ end subroutine get_d0
 !          hij_m = hij_m * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
 !          !DIR$ LOOP COUNT AVG(4)
 !          do k=1,N_states
-!            tmp_row_p(k,putj) = tmp_row_p(k,putj) + hij_p * coefs(k)
-!            tmp_row_m(k,putj) = tmp_row_m(k,putj) + hij_m * coefs(k)
+!            tmp_row_ij_p(k,putj) = tmp_row_ij_p(k,putj) + hij_p * coefs(k)
+!            tmp_row_ij_m(k,putj) = tmp_row_ij_m(k,putj) + hij_m * coefs(k)
 !          enddo
 !        endif
 !      end do
 !
 !      if(ma == 1) then
-!        mat_p(1:N_states,1:mo_num,puti) = mat_p(1:N_states,1:mo_num,puti) + tmp_row_p(1:N_states,1:mo_num)
-!        mat_m(1:N_states,1:mo_num,puti) = mat_m(1:N_states,1:mo_num,puti) + tmp_row_m(1:N_states,1:mo_num)
+!        mat_p(1:N_states,1:mo_num,puti) = mat_p(1:N_states,1:mo_num,puti) + tmp_row_ij_p(1:N_states,1:mo_num)
+!        mat_m(1:N_states,1:mo_num,puti) = mat_m(1:N_states,1:mo_num,puti) + tmp_row_ij_m(1:N_states,1:mo_num)
 !      else
 !        do l=1,mo_num
 !          !DIR$ LOOP COUNT AVG(4)
 !          do k=1,N_states
-!            mat_p(k,puti,l) = mat_p(k,puti,l) + tmp_row_p(k,l)
-!            mat_m(k,puti,l) = mat_m(k,puti,l) + tmp_row_m(k,l)
+!            mat_p(k,puti,l) = mat_p(k,puti,l) + tmp_row_ij_p(k,l)
+!            mat_m(k,puti,l) = mat_m(k,puti,l) + tmp_row_ij_m(k,l)
 !          enddo
 !        enddo
 !      end if
@@ -1037,15 +1203,15 @@ end subroutine get_d0
 !
 !    !MOVE MI
 !    pfix = p(1,mi)
-!    tmp_row_p  = 0d0
-!    tmp_row_m  = 0d0
-!    tmp_row2_p = 0d0
-!    tmp_row2_m = 0d0
+!    tmp_row_ij_p  = 0d0
+!    tmp_row_ij_m  = 0d0
+!    tmp_row_ij2_p = 0d0
+!    tmp_row_ij2_m = 0d0
 !
-!    call get_mo_two_e_integrals_tc_int   (hfix, pfix, p1, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
-!    call get_mo_two_e_integrals_tc_int   (hfix, pfix, p2, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
-!    call get_mo_two_e_integrals_tcdag_int(hfix, pfix, p1, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
-!    call get_mo_two_e_integrals_tcdag_int(hfix, pfix, p2, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
+!    call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, pfix, p1, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!    call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, pfix, p2, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!    call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, pfix, p1, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
+!    call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, pfix, p2, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
 !
 !    putj = p1
 !    do puti=1,mo_num !HOT
@@ -1062,8 +1228,8 @@ end subroutine get_d0
 !          hij_m = hij_m * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
 !          !DIR$ LOOP COUNT AVG(4)
 !          do k=1,N_states
-!            tmp_row_p(k,puti) = tmp_row_p(k,puti) + hij_p * coefs(k)
-!            tmp_row_m(k,puti) = tmp_row_m(k,puti) + hij_m * coefs(k)
+!            tmp_row_ij_p(k,puti) = tmp_row_ij_p(k,puti) + hij_p * coefs(k)
+!            tmp_row_ij_m(k,puti) = tmp_row_ij_m(k,puti) + hij_m * coefs(k)
 !          enddo
 !        endif
 !      end if
@@ -1078,26 +1244,26 @@ end subroutine get_d0
 !          hij_p = hij_p * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
 !          hij_m = hij_m * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
 !          do k=1,N_states
-!            tmp_row2_p(k,puti) = tmp_row2_p(k,puti) + hij_p * coefs(k)
-!            tmp_row2_m(k,puti) = tmp_row2_m(k,puti) + hij_m * coefs(k)
+!            tmp_row_ij2_p(k,puti) = tmp_row_ij2_p(k,puti) + hij_p * coefs(k)
+!            tmp_row_ij2_m(k,puti) = tmp_row_ij2_m(k,puti) + hij_m * coefs(k)
 !          enddo
 !        endif
 !      end if
 !    end do
 !
 !    if(mi == 1) then
-!      mat_p(:,:,p1) = mat_p(:,:,p1) + tmp_row_p (:,:)
-!      mat_p(:,:,p2) = mat_p(:,:,p2) + tmp_row2_p(:,:)
-!      mat_m(:,:,p1) = mat_m(:,:,p1) + tmp_row_m (:,:)
-!      mat_m(:,:,p2) = mat_m(:,:,p2) + tmp_row2_m(:,:)
+!      mat_p(:,:,p1) = mat_p(:,:,p1) + tmp_row_ij_p (:,:)
+!      mat_p(:,:,p2) = mat_p(:,:,p2) + tmp_row_ij2_p(:,:)
+!      mat_m(:,:,p1) = mat_m(:,:,p1) + tmp_row_ij_m (:,:)
+!      mat_m(:,:,p2) = mat_m(:,:,p2) + tmp_row_ij2_m(:,:)
 !    else
 !      do l=1,mo_num
 !        !DIR$ LOOP COUNT AVG(4)
 !        do k=1,N_states
-!          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_p (k,l)
-!          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row2_p(k,l)
-!          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_m (k,l)
-!          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row2_m(k,l)
+!          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_ij_p (k,l)
+!          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_ij2_p(k,l)
+!          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_ij_m (k,l)
+!          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_ij2_m(k,l)
 !        enddo
 !      enddo
 !    end if
@@ -1111,13 +1277,13 @@ end subroutine get_d0
 !        p1 = p(turn3(1,i), ma)
 !        p2 = p(turn3(2,i), ma)
 !
-!        call get_mo_two_e_integrals_tc_int   (hfix, p1, p2, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
-!        call get_mo_two_e_integrals_tc_int   (hfix, p2, p1, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
-!        call get_mo_two_e_integrals_tcdag_int(hfix, p1, p2, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
-!        call get_mo_two_e_integrals_tcdag_int(hfix, p2, p1, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
+!        call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p1, p2, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!        call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p2, p1, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!        call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p1, p2, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
+!        call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p2, p1, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
 !
-!        tmp_row_p = 0d0
-!        tmp_row_m = 0d0
+!        tmp_row_ij_p = 0d0
+!        tmp_row_ij_m = 0d0
 !        do putj=1,hfix-1
 !          if(banned(putj,puti,1)) cycle
 !          if(lbanned(putj,ma)) cycle
@@ -1128,8 +1294,8 @@ end subroutine get_d0
 !          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
 !            hij_p = hij_p * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
 !            hij_m = hij_m * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
-!            tmp_row_p(:,putj) = tmp_row_p(:,putj) + hij_p * coefs(:)
-!            tmp_row_m(:,putj) = tmp_row_m(:,putj) + hij_m * coefs(:)
+!            tmp_row_ij_p(:,putj) = tmp_row_ij_p(:,putj) + hij_p * coefs(:)
+!            tmp_row_ij_m(:,putj) = tmp_row_ij_m(:,putj) + hij_m * coefs(:)
 !          endif
 !        end do
 !        do putj=hfix+1,mo_num
@@ -1142,18 +1308,18 @@ end subroutine get_d0
 !          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
 !            hij_p = hij_p * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
 !            hij_m = hij_m * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
-!            tmp_row_p(:,putj) = tmp_row_p(:,putj) + hij_p * coefs(:)
-!            tmp_row_m(:,putj) = tmp_row_m(:,putj) + hij_m * coefs(:)
+!            tmp_row_ij_p(:,putj) = tmp_row_ij_p(:,putj) + hij_p * coefs(:)
+!            tmp_row_ij_m(:,putj) = tmp_row_ij_m(:,putj) + hij_m * coefs(:)
 !          endif
 !        end do
 !
-!        mat_p(:, :puti-1, puti) = mat_p(:, :puti-1, puti) + tmp_row_p(:,:puti-1)
-!        mat_m(:, :puti-1, puti) = mat_m(:, :puti-1, puti) + tmp_row_m(:,:puti-1)
+!        mat_p(:, :puti-1, puti) = mat_p(:, :puti-1, puti) + tmp_row_ij_p(:,:puti-1)
+!        mat_m(:, :puti-1, puti) = mat_m(:, :puti-1, puti) + tmp_row_ij_m(:,:puti-1)
 !        do l=puti,mo_num
 !          !DIR$ LOOP COUNT AVG(4)
 !          do k=1,N_states
-!            mat_p(k, puti, l) = mat_p(k, puti,l) + tmp_row_p(k,l)
-!            mat_m(k, puti, l) = mat_m(k, puti,l) + tmp_row_m(k,l)
+!            mat_p(k, puti, l) = mat_p(k, puti,l) + tmp_row_ij_p(k,l)
+!            mat_m(k, puti, l) = mat_m(k, puti,l) + tmp_row_ij_m(k,l)
 !          enddo
 !        enddo
 !      end do
@@ -1162,15 +1328,15 @@ end subroutine get_d0
 !      pfix = p(1,mi)
 !      p1 = p(1,ma)
 !      p2 = p(2,ma)
-!      tmp_row_p  = 0d0
-!      tmp_row_m  = 0d0
-!      tmp_row2_p = 0d0
-!      tmp_row2_m = 0d0
+!      tmp_row_ij_p  = 0d0
+!      tmp_row_ij_m  = 0d0
+!      tmp_row_ij2_p = 0d0
+!      tmp_row_ij2_m = 0d0
 !
-!      call get_mo_two_e_integrals_tc_int   (hfix, p1, pfix, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
-!      call get_mo_two_e_integrals_tc_int   (hfix, p2, pfix, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
-!      call get_mo_two_e_integrals_tcdag_int(hfix, p1, pfix, mo_num, hijp_cache(1,1), mo_integrals_tcdag_int_map)
-!      call get_mo_two_e_integrals_tcdag_int(hfix, p2, pfix, mo_num, hijp_cache(1,2), mo_integrals_tcdag_int_map)
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p1, pfix, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (hfix, p2, pfix, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p1, pfix, mo_num, hijp_cache(1,1), mo_integrals_tcdag_int_map)
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(hfix, p2, pfix, mo_num, hijp_cache(1,2), mo_integrals_tcdag_int_map)
 !
 !      putj = p2
 !      do puti=1,mo_num
@@ -1186,8 +1352,8 @@ end subroutine get_d0
 !            hij_m = hij_m * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
 !            !DIR$ LOOP COUNT AVG(4)
 !            do k=1,N_states
-!              tmp_row_p(k,puti) = tmp_row_p(k,puti) + hij_p * coefs(k)
-!              tmp_row_m(k,puti) = tmp_row_m(k,puti) + hij_m * coefs(k)
+!              tmp_row_ij_p(k,puti) = tmp_row_ij_p(k,puti) + hij_p * coefs(k)
+!              tmp_row_ij_m(k,puti) = tmp_row_ij_m(k,puti) + hij_m * coefs(k)
 !            enddo
 !          endif
 !        end if
@@ -1200,28 +1366,28 @@ end subroutine get_d0
 !            hij_p = hij_p * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
 !            hij_m = hij_m * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
 !            do k=1,N_states
-!              tmp_row2_p(k,puti) = tmp_row2_p(k,puti) + hij_p * coefs(k)
-!              tmp_row2_m(k,puti) = tmp_row2_m(k,puti) + hij_m * coefs(k)
+!              tmp_row_ij2_p(k,puti) = tmp_row_ij2_p(k,puti) + hij_p * coefs(k)
+!              tmp_row_ij2_m(k,puti) = tmp_row_ij2_m(k,puti) + hij_m * coefs(k)
 !            enddo
 !          endif
 !        end if
 !      end do
-!      mat_p(:,:p2-1,p2) = mat_p(:,:p2-1,p2) + tmp_row_p(:,:p2-1)
-!      mat_m(:,:p2-1,p2) = mat_m(:,:p2-1,p2) + tmp_row_m(:,:p2-1)
+!      mat_p(:,:p2-1,p2) = mat_p(:,:p2-1,p2) + tmp_row_ij_p(:,:p2-1)
+!      mat_m(:,:p2-1,p2) = mat_m(:,:p2-1,p2) + tmp_row_ij_m(:,:p2-1)
 !      do l=p2,mo_num
 !        !DIR$ LOOP COUNT AVG(4)
 !        do k=1,N_states
-!          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_p(k,l)
-!          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_m(k,l)
+!          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_ij_p(k,l)
+!          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_ij_m(k,l)
 !        enddo
 !      enddo
-!      mat_p(:,:p1-1,p1) = mat_p(:,:p1-1,p1) + tmp_row2_p(:,:p1-1)
-!      mat_m(:,:p1-1,p1) = mat_m(:,:p1-1,p1) + tmp_row2_m(:,:p1-1)
+!      mat_p(:,:p1-1,p1) = mat_p(:,:p1-1,p1) + tmp_row_ij2_p(:,:p1-1)
+!      mat_m(:,:p1-1,p1) = mat_m(:,:p1-1,p1) + tmp_row_ij2_m(:,:p1-1)
 !      do l=p1,mo_num
 !        !DIR$ LOOP COUNT AVG(4)
 !        do k=1,N_states
-!          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row2_p(k,l)
-!          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row2_m(k,l)
+!          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_ij2_p(k,l)
+!          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_ij2_m(k,l)
 !        enddo
 !      enddo
 !    end if
@@ -1280,8 +1446,8 @@ end subroutine get_d0
 !  double precision,  intent(in)   :: coefs(N_states)
 !  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
 !
-!  double precision, external      :: get_phase_bi, mo_two_e_integral
-!  double precision, external      :: get_mo_two_e_integral_tc_int, get_mo_two_e_integral_tcdag_int 
+!  double precision, external      :: get_phase_bi
+!  double precision, external      :: get_mo_bi_ortho_tc_two_e_tc_int, get_mo_bi_ortho_tc_two_e_tcdag_int 
 !  integer, parameter              :: bant=1
 !  integer                         :: i, j, k, s, h1, h2, p1, p2, puti, putj
 !  logical                         :: ok
@@ -1299,8 +1465,8 @@ end subroutine get_d0
 !    do p1=1, mo_num
 !      if(bannedOrb(p1, 1)) cycle
 !
-!      call get_mo_two_e_integrals_tc_int   (p1, h2, h1, mo_num, hijp_cache1, mo_integrals_tc_int_map   )
-!      call get_mo_two_e_integrals_tcdag_int(p1, h2, h1, mo_num, hijm_cache1, mo_integrals_tcdag_int_map)
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (p1, h2, h1, mo_num, hijp_cache1, mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(p1, h2, h1, mo_num, hijm_cache1, mo_integrals_tcdag_int_map)
 !
 !      do p2 = 1, mo_num
 !        if(bannedOrb(p2,2)) cycle
@@ -1329,10 +1495,10 @@ end subroutine get_d0
 !    do puti=1, mo_num
 !      if(bannedOrb(puti, sp)) cycle
 !
-!      call get_mo_two_e_integrals_tc_int   (puti, p2, p1, mo_num, hijp_cache1, mo_integrals_tc_int_map   )
-!      call get_mo_two_e_integrals_tc_int   (puti, p1, p2, mo_num, hijp_cache2, mo_integrals_tc_int_map   )
-!      call get_mo_two_e_integrals_tcdag_int(puti, p2, p1, mo_num, hijm_cache1, mo_integrals_tcdag_int_map)
-!      call get_mo_two_e_integrals_tcdag_int(puti, p1, p2, mo_num, hijm_cache2, mo_integrals_tcdag_int_map)
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (puti, p2, p1, mo_num, hijp_cache1, mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tc_int   (puti, p1, p2, mo_num, hijp_cache2, mo_integrals_tc_int_map   )
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(puti, p2, p1, mo_num, hijm_cache1, mo_integrals_tcdag_int_map)
+!      call get_mo_bi_ortho_tc_two_es_tcdag_int(puti, p1, p2, mo_num, hijm_cache2, mo_integrals_tcdag_int_map)
 !
 !      do putj=puti+1, mo_num
 !        if(bannedOrb(putj, sp)) cycle
@@ -1344,10 +1510,10 @@ end subroutine get_d0
 !          if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle
 !        else
 !
-!          hij_p = get_mo_two_e_integral_tc_int   (p1, p2, puti, putj, mo_integrals_tc_int_map   ) &
-!                - get_mo_two_e_integral_tc_int   (p2, p1, puti, putj, mo_integrals_tc_int_map   )
-!          hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, puti, putj, mo_integrals_tcdag_int_map) &
-!                - get_mo_two_e_integral_tcdag_int(p2, p1, puti, putj, mo_integrals_tcdag_int_map)
+!          hij_p = get_mo_bi_ortho_tc_two_e_tc_int   (p1, p2, puti, putj, mo_integrals_tc_int_map   ) &
+!                - get_mo_bi_ortho_tc_two_e_tc_int   (p2, p1, puti, putj, mo_integrals_tc_int_map   )
+!          hij_m = get_mo_bi_ortho_tc_two_e_tcdag_int(p1, p2, puti, putj, mo_integrals_tcdag_int_map) &
+!                - get_mo_bi_ortho_tc_two_e_tcdag_int(p2, p1, puti, putj, mo_integrals_tcdag_int_map)
 !
 !          if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle
 !
@@ -1391,7 +1557,7 @@ subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
 
   integer :: i, j, s, h1, h2, p1, p2, puti, putj
   double precision :: hij, phase
-  double precision, external :: get_phase_bi, mo_two_e_integral
+  double precision, external :: get_phase_bi
   logical :: ok
 
   integer :: bant
@@ -1411,7 +1577,7 @@ subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
           call i_h_j(gen, det, N_int, hij)
         else
           phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
-          hij = mo_two_e_integral(p1, p2, h1, h2) * phase
+          hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) * phase
         end if
         mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij
       end do
@@ -1428,7 +1594,7 @@ subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
           call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
           call i_h_j(gen, det, N_int, hij)
         else
-          hij = (mo_two_e_integral(p1, p2, puti, putj) -  mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+          hij = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) -  mo_bi_ortho_tc_two_e(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
         end if
         mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
       end do
@@ -1451,8 +1617,8 @@ subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
   double precision, intent(in)   :: coefs(N_states)
   double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
   integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
-  double precision               :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num)
-  double precision, external     :: get_phase_bi, mo_two_e_integral
+  double precision               :: hij, tmp_row_ij(N_states, mo_num), tmp_row_ij2(N_states, mo_num), hji
+  double precision, external     :: get_phase_bi
   logical                        :: ok
 
   logical, allocatable           :: lbanned(:,:)
@@ -1489,51 +1655,51 @@ subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
     p1 = p(1,ma)
     p2 = p(2,ma)
     if(.not. bannedOrb(puti, mi)) then
-      tmp_row = 0d0
+      tmp_row_ij = 0d0
       do putj=1, hfix-1
         if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle
-        hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
-        tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states)
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, putj, hfix)-mo_bi_ortho_tc_two_e(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+        tmp_row_ij(1:N_states,putj) = tmp_row_ij(1:N_states,putj) + hij * coefs(1:N_states)
       end do
       do putj=hfix+1, mo_num
         if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle
-        hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
-        tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states)
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, hfix, putj)-mo_bi_ortho_tc_two_e(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+        tmp_row_ij(1:N_states,putj) = tmp_row_ij(1:N_states,putj) + hij * coefs(1:N_states)
       end do
 
       if(ma == 1) then
-        mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
+        mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row_ij(1:N_states,1:mo_num)
       else
-        mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num)
+        mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row_ij(1:N_states,1:mo_num)
       end if
     end if
 
     !MOVE MI
     pfix = p(1,mi)
-    tmp_row = 0d0
-    tmp_row2 = 0d0
+    tmp_row_ij = 0d0
+    tmp_row_ij2 = 0d0
     do puti=1,mo_num
       if(lbanned(puti,mi)) cycle
       !p1 fixed
       putj = p1
       if(.not. banned(putj,puti,bant)) then
-        hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
-        tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:)
+        hij = mo_bi_ortho_tc_two_e(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+        tmp_row_ij(:,puti) = tmp_row_ij(:,puti) + hij * coefs(:)
       end if
 
       putj = p2
       if(.not. banned(putj,puti,bant)) then
-        hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
-        tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:)
+        hij = mo_bi_ortho_tc_two_e(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+        tmp_row_ij2(:,puti) = tmp_row_ij2(:,puti) + hij * coefs(:)
       end if
     end do
 
     if(mi == 1) then
-      mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
-      mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
+      mat(:,:,p1) = mat(:,:,p1) + tmp_row_ij(:,:)
+      mat(:,:,p2) = mat(:,:,p2) + tmp_row_ij2(:,:)
     else
-      mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:)
-      mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:)
+      mat(:,p1,:) = mat(:,p1,:) + tmp_row_ij(:,:)
+      mat(:,p2,:) = mat(:,p2,:) + tmp_row_ij2(:,:)
     end if
   else
     if(p(0,ma) == 3) then
@@ -1542,46 +1708,46 @@ subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
         puti = p(i, ma)
         p1 = p(turn3(1,i), ma)
         p2 = p(turn3(2,i), ma)
-        tmp_row = 0d0
+        tmp_row_ij = 0d0
         do putj=1,hfix-1
           if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle
-          hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
-          tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          hij = (mo_bi_ortho_tc_two_e(p1, p2, putj, hfix)-mo_bi_ortho_tc_two_e(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          tmp_row_ij(:,putj) = tmp_row_ij(:,putj) + hij * coefs(:)
         end do
         do putj=hfix+1,mo_num
           if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle
-          hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
-          tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          hij = (mo_bi_ortho_tc_two_e(p1, p2, hfix, putj)-mo_bi_ortho_tc_two_e(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          tmp_row_ij(:,putj) = tmp_row_ij(:,putj) + hij * coefs(:)
         end do
 
-        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
-        mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:)
+        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row_ij(:,:puti-1)
+        mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row_ij(:,puti:)
       end do
     else
       hfix = h(1,mi)
       pfix = p(1,mi)
       p1 = p(1,ma)
       p2 = p(2,ma)
-      tmp_row = 0d0
-      tmp_row2 = 0d0
+      tmp_row_ij = 0d0
+      tmp_row_ij2 = 0d0
       do puti=1,mo_num
         if(lbanned(puti,ma)) cycle
         putj = p2
         if(.not. banned(puti,putj,1)) then
-          hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
-          tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:)
+          hij = mo_bi_ortho_tc_two_e(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+          tmp_row_ij(:,puti) = tmp_row_ij(:,puti) + hij * coefs(:)
         end if
 
         putj = p1
         if(.not. banned(puti,putj,1)) then
-          hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
-          tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:)
+          hij = mo_bi_ortho_tc_two_e(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+          tmp_row_ij2(:,puti) = tmp_row_ij2(:,puti) + hij * coefs(:)
         end if
       end do
-      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
-      mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:)
-      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
-      mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:)
+      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row_ij(:,:p2-1)
+      mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row_ij(:,p2:)
+      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row_ij2(:,:p1-1)
+      mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row_ij2(:,p1:)
     end if
   end if
   deallocate(lbanned)
@@ -1624,11 +1790,11 @@ subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
   double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
   integer, intent(in) :: h(0:2,2), p(0:4,2), sp
 
-  double precision, external :: get_phase_bi, mo_two_e_integral
+  double precision, external :: get_phase_bi
 
   integer :: i, j, tip, ma, mi, puti, putj
-  integer :: h1, h2, p1, p2, i1, i2
-  double precision :: hij, phase
+  integer :: h1, h2, p1, p2, i1, i2, mm
+  double precision :: hij, phase, hji
 
   integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
   integer, parameter :: turn2(2) = (/2, 1/)
@@ -1659,7 +1825,7 @@ subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
         h1 = h(1, ma)
         h2 = h(2, ma)
 
-        hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
         if(ma == 1) then
           mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij
         else
@@ -1678,7 +1844,7 @@ subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
           if(banned(puti,putj,bant)) cycle
           p1 = p(turn2(i), 1)
 
-          hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int)
+          hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int)
           mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
         end do
       end do
@@ -1698,7 +1864,7 @@ subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
         i2 = turn2d(2, i, j)
         p1 = p(i1, ma)
         p2 = p(i2, ma)
-        hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int)
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int)
         mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
       end do
       end do
@@ -1712,7 +1878,7 @@ subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
         if(banned(puti,putj,1)) cycle
         p2 = p(i, ma)
 
-        hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int)
+        hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int)
         mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij
       end do
     else ! tip == 4
@@ -1723,7 +1889,7 @@ subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p,
         p2 = p(2, mi)
         h1 = h(1, mi)
         h2 = h(2, mi)
-        hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int)
+        hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int)
         mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
       end if
     end if
diff --git a/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f
index 56e6bd14..e146efb1 100644
--- a/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f
+++ b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f
@@ -130,7 +130,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
   PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
   PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
   PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
-  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted_tc
+  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp_tc psi_det_sorted_tc
   PROVIDE psi_det_hii selection_weight pseudo_sym
   PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
   PROVIDE excitation_beta_max  excitation_alpha_max excitation_max
diff --git a/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f b/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f
index e6b016fa..d351cc79 100644
--- a/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f
+++ b/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f
@@ -23,7 +23,7 @@ subroutine run_selection_slave(thread, iproc, energy)
   PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
   PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
   PROVIDE psi_bilinear_matrix_transp_order N_int pt2_F pseudo_sym
-  PROVIDE psi_selectors_coef_transp psi_det_sorted_tc weight_selection
+  PROVIDE psi_selectors_coef_transp_tc psi_det_sorted_tc weight_selection
 
   call pt2_alloc(pt2_data,N_states)
 
diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f
index 6b93f663..9c695ba8 100644
--- a/src/cipsi_tc_bi_ortho/selection.irp.f
+++ b/src/cipsi_tc_bi_ortho/selection.irp.f
@@ -81,7 +81,7 @@ subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock
   PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
   PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
   PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
-  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp
+  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp_tc
   PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp
 
   PROVIDE banned_excitation
@@ -511,7 +511,7 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere
   integer(bit_kind)               :: phasemask(N_int,2)
 
 
-  PROVIDE psi_selectors_coef_transp psi_det_sorted_tc
+  PROVIDE psi_selectors_coef_transp_tc psi_det_sorted_tc
   PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp
 
 
@@ -564,29 +564,30 @@ subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, intere
         call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
         call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
 
-        call get_d3_h  ( det(1,1,i), bannedOrb, banned, mat         , mask, p, sp, psi_selectors_coef_transp (1, interesting(i)) )
-        call get_d3_htc( det(1,1,i), bannedOrb, banned, mat_m, mat_p, mask, p, sp, psi_selectors_rcoef_bi_orth_transp(1, interesting(i)) &
-                       , psi_selectors_lcoef_bi_orth_transp(1, interesting(i)) )
+        perMask(1,1) = iand(mask(1,1), not(det(1,1,i)))
+        perMask(1,2) = iand(mask(1,2), not(det(1,2,i)))
+        do j=2,N_int
+          perMask(j,1) = iand(mask(j,1), not(det(j,1,i)))
+          perMask(j,2) = iand(mask(j,2), not(det(j,2,i)))
+        end do
+!        call get_d3_h  ( det(1,1,i), bannedOrb, banned, mat         , mask, p, sp, psi_selectors_coef_transp_tc (1, interesting(i)) )
+!        call get_d3_htc( det(1,1,i), bannedOrb, banned, mat_m, mat_p, mask, p, sp, psi_selectors_rcoef_bi_orth_transp(1, interesting(i)) &
+!                       , psi_selectors_lcoef_bi_orth_transp(1, interesting(i)) )
 
-        !perMask(1,1) = iand(mask(1,1), not(det(1,1,i)))
-        !perMask(1,2) = iand(mask(1,2), not(det(1,2,i)))
-        !do j=2,N_int
-        !  perMask(j,1) = iand(mask(j,1), not(det(j,1,i)))
-        !  perMask(j,2) = iand(mask(j,2), not(det(j,2,i)))
-        !end do
-        !call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int)
-        !call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int)
-        !call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int)
-        !if(nt == 4) then
-        !  call get_d2 (det(1,1,i), phasemask, bannedOrb, banned, mat,          mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
-        !  call get_pm2(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
-        !elseif(nt == 3) then
-        !  call get_d1 (det(1,1,i), phasemask, bannedOrb, banned, mat         , mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
-        !  call get_pm1(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
-        !else
-        !  call get_d0 (det(1,1,i), phasemask, bannedOrb, banned, mat         , mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
-        !  call get_pm0(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
-        !endif
+        call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int)
+        call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int)
+
+        call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int)
+        if(nt == 4) then
+          call get_d2  (det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i)))
+!          call get_pm2(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp_tc(1, interesting(i)))
+        elseif(nt == 3) then
+          call get_d1 (det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i)))
+!          call get_pm1(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp_tc(1, interesting(i)))
+        else
+          call get_d0 (det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp_tc(1, 1, interesting(i)))
+!          call get_pm0(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp_tc(1, interesting(i)))
+        endif
     elseif(nt == 4) then
         call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
         call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
@@ -775,17 +776,57 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
 
 !        call get_excitation_degree( HF_bitmask, det, degree, N_int)
 
-!        psi_h_alpha = mat_m(istate, p1, p2)
-!        alpha_h_psi = mat_p(istate, p1, p2)
+        double precision :: alpha_h_psi_tmp, psi_h_alpha_tmp
+        psi_h_alpha_tmp = mat_m(istate, p1, p2)
+        alpha_h_psi_tmp = mat_p(istate, p1, p2)
 !
         psi_h_alpha = 0.d0
         alpha_h_psi = 0.d0
         do iii = 1, N_det
           call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,iii), det, N_int, i_h_alpha)
           call htilde_mu_mat_bi_ortho_tot(det, psi_det(1,1,iii), N_int, alpha_h_i)
-          psi_h_alpha += i_h_alpha * leigvec_tc_bi_orth(iii,1)
-          alpha_h_psi += alpha_h_i * reigvec_tc_bi_orth(iii,1) 
+!          psi_h_alpha += i_h_alpha * leigvec_tc_bi_orth(iii,1)
+!          alpha_h_psi += alpha_h_i * reigvec_tc_bi_orth(iii,1) 
+          psi_h_alpha += i_h_alpha * 1.d0
+          alpha_h_psi += alpha_h_i * 1.d0
         enddo
+!          print*,'---',p1,p2
+!          call debug_det(det,N_int)
+!          print*,psi_h_alpha    *alpha_h_psi,    psi_h_alpha,    alpha_h_psi  
+!          print*,psi_h_alpha_tmp*alpha_h_psi_tmp,psi_h_alpha_tmp,alpha_h_psi_tmp  
+!         if(dabs(psi_h_alpha - psi_h_alpha_tmp).gt.1.d-10 .or. dabs(alpha_h_psi - alpha_h_psi_tmp).gt.1.d-10)then
+!        if(dabs(psi_h_alpha_tmp*alpha_h_psi_tmp).gt.1.d+10)then
+        if(dabs(psi_h_alpha*alpha_h_psi - psi_h_alpha_tmp*alpha_h_psi_tmp).gt.1.d-10)then
+!          print*,'---'
+!          print*,psi_h_alpha    *alpha_h_psi,    psi_h_alpha,    alpha_h_psi  
+!          print*,psi_h_alpha_tmp*alpha_h_psi_tmp,psi_h_alpha_tmp,alpha_h_psi_tmp  
+         call debug_det(det,N_int)
+          print*,dabs(psi_h_alpha*alpha_h_psi - psi_h_alpha_tmp*alpha_h_psi_tmp),psi_h_alpha    *alpha_h_psi,psi_h_alpha_tmp*alpha_h_psi_tmp
+          print*,'-- Good '
+          print*,   psi_h_alpha,    alpha_h_psi  
+          print*,'-- bad '
+          print*,psi_h_alpha_tmp,alpha_h_psi_tmp  
+          print*,'-- details good'
+        do iii = 1, N_det
+          call get_excitation_degree( psi_det(1,1,iii), det, degree, N_int)
+          call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,iii), det, N_int, i_h_alpha)
+          call htilde_mu_mat_bi_ortho_tot(det, psi_det(1,1,iii), N_int, alpha_h_i)
+          print*,iii,degree,i_h_alpha,alpha_h_i
+          
+        enddo
+!          if(dabs(psi_h_alpha*alpha_h_psi).gt.1.d-10)then
+!          print*,p1,p2
+!          print*,det(1,1), det(1,2)
+!          call debug_det(det,N_int)
+!          print*,psi_h_alpha    *alpha_h_psi,    psi_h_alpha,    alpha_h_psi  
+!          print*,psi_h_alpha_tmp*alpha_h_psi_tmp,psi_h_alpha_tmp,alpha_h_psi_tmp  
+!          print*, dabs(psi_h_alpha*alpha_h_psi - psi_h_alpha_tmp*alpha_h_psi_tmp),& 
+!                   psi_h_alpha    *alpha_h_psi,psi_h_alpha_tmp*alpha_h_psi_tmp
+        stop
+          endif
+!          endif
+!          stop
+!        endif
 
         !if(alpha_h_psi*psi_h_alpha/delta_E.gt.1.d-10)then
         !  print*, 'E0,Hii,E_shift'
diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
index 33fe23fc..01cb57dd 100644
--- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
+++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
@@ -89,6 +89,7 @@ subroutine run_stochastic_cipsi
     call pt2_alloc(pt2_data, N_states)
     call pt2_alloc(pt2_data_err, N_states)
     call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
+    stop
 
     N_iter += 1
 
diff --git a/src/determinants/fock_diag.irp.f b/src/determinants/fock_diag.irp.f
index a8ce33b8..c7c951b3 100644
--- a/src/determinants/fock_diag.irp.f
+++ b/src/determinants/fock_diag.irp.f
@@ -33,59 +33,59 @@ subroutine build_fock_tmp(fock_diag_tmp,det_ref,Nint)
   ! Occupied MOs
   do ii=1,elec_alpha_num
     i = occ(ii,1)
-    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
-    E0 = E0 + mo_one_e_integrals(i,i)
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_one_e(i,i)
+    E0 = E0 + mo_bi_ortho_tc_one_e(i,i)
     do jj=1,elec_alpha_num
       j = occ(jj,1)
       if (i==j) cycle
-      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
-      E0 = E0 + 0.5d0*mo_two_e_integrals_jj_anti(i,j)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_two_e_jj_anti(i,j)
+      E0 = E0 + 0.5d0*mo_bi_ortho_tc_two_e_jj_anti(i,j)
     enddo
     do jj=1,elec_beta_num
       j = occ(jj,2)
-      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj(i,j)
-      E0 = E0 + mo_two_e_integrals_jj(i,j)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_two_e_jj(i,j)
+      E0 = E0 + mo_bi_ortho_tc_two_e_jj(i,j)
     enddo
   enddo
   do ii=1,elec_beta_num
     i = occ(ii,2)
-    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
-    E0 = E0 + mo_one_e_integrals(i,i)
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_one_e(i,i)
+    E0 = E0 + mo_bi_ortho_tc_one_e(i,i)
     do jj=1,elec_beta_num
       j = occ(jj,2)
       if (i==j) cycle
-      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
-      E0 = E0 + 0.5d0*mo_two_e_integrals_jj_anti(i,j)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_two_e_jj_anti(i,j)
+      E0 = E0 + 0.5d0*mo_bi_ortho_tc_two_e_jj_anti(i,j)
     enddo
     do jj=1,elec_alpha_num
       j = occ(jj,1)
-      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj(i,j)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_two_e_jj(i,j)
     enddo
   enddo
 
   ! Virtual MOs
   do i=1,mo_num
     if (fock_diag_tmp(1,i) /= 0.d0) cycle
-    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_one_e(i,i)
     do jj=1,elec_alpha_num
       j = occ(jj,1)
-      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_two_e_jj_anti(i,j)
     enddo
     do jj=1,elec_beta_num
       j = occ(jj,2)
-      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj(i,j)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_two_e_jj(i,j)
     enddo
   enddo
   do i=1,mo_num
     if (fock_diag_tmp(2,i) /= 0.d0) cycle
-    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_one_e(i,i)
     do jj=1,elec_beta_num
       j = occ(jj,2)
-      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_two_e_jj_anti(i,j)
     enddo
     do jj=1,elec_alpha_num
       j = occ(jj,1)
-      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj(i,j)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_two_e_jj(i,j)
     enddo
   enddo
 
diff --git a/src/fci_tc_bi/selectors.irp.f b/src/fci_tc_bi/selectors.irp.f
index 734c8ed0..502c2b7d 100644
--- a/src/fci_tc_bi/selectors.irp.f
+++ b/src/fci_tc_bi/selectors.irp.f
@@ -32,6 +32,7 @@ END_PROVIDER
 
  BEGIN_PROVIDER [ integer(bit_kind), psi_selectors, (N_int,2,psi_selectors_size) ]
 &BEGIN_PROVIDER [ double precision, psi_selectors_coef, (psi_selectors_size,N_states) ]
+&BEGIN_PROVIDER [ double precision, psi_selectors_coef_tc, (psi_selectors_size,2,N_states) ]
   implicit none
   BEGIN_DOC
   ! Determinants on which we apply <i|H|psi> for perturbation.
@@ -47,12 +48,17 @@ END_PROVIDER
   do k=1,N_states
     do i=1,N_det_selectors
       psi_selectors_coef(i,k) = psi_coef_sorted_tc_gen(i,k)
+!      psi_selectors_coef_tc(i,1,k) = psi_r_coef_sorted_bi_ortho(i,k)
+!      psi_selectors_coef_tc(i,2,k) = psi_l_coef_sorted_bi_ortho(i,k)
+      psi_selectors_coef_tc(i,1,k) = 1.d0
+      psi_selectors_coef_tc(i,2,k) = 1.d0
     enddo
   enddo
 
 END_PROVIDER
 
-BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
+ BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
+&BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp_tc, (N_states,2,psi_selectors_size) ]
   implicit none
   BEGIN_DOC
   ! Transposed psi_selectors
@@ -62,6 +68,8 @@ BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_sele
   do i=1,N_det_selectors
     do k=1,N_states
       psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k)
+      psi_selectors_coef_transp_tc(k,1,i) = psi_selectors_coef_tc(i,1,k)
+      psi_selectors_coef_transp_tc(k,2,i) = psi_selectors_coef_tc(i,2,k)
     enddo
   enddo
 END_PROVIDER
diff --git a/src/tc_bi_ortho/slater_tc_opt.irp.f b/src/tc_bi_ortho/slater_tc_opt.irp.f
index 8ab3388c..1d7e7a5f 100644
--- a/src/tc_bi_ortho/slater_tc_opt.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@@ -42,3 +42,45 @@ subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree,
 end
 
 ! ---
+
+subroutine htilde_mu_mat_opt_bi_ortho_no_3e(key_j, key_i, Nint, htot)
+
+  BEGIN_DOC
+  !
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  ! Returns the detail of the matrix element WITHOUT ANY CONTRIBUTION FROM THE THREE ELECTRON TERMS 
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+  double precision, intent(out) :: htot
+  integer                       :: degree 
+
+  htot   = 0.d0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.gt.2) return
+
+  if(degree == 0)then
+    call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,htot)
+  else if (degree == 1)then
+    call single_htilde_mu_mat_fock_bi_ortho_no_3e(Nint,key_j, key_i , htot)
+  else if(degree == 2)then
+    call double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot)
+  endif
+
+  if(degree==0) then
+    htot += nuclear_repulsion
+  endif
+ 
+end
+
+! ---
diff --git a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
index c0b59969..68f647dd 100644
--- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
@@ -277,3 +277,197 @@ subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
 
 end
 
+
+subroutine diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, det_in,htot)
+  implicit none
+  BEGIN_DOC
+  ! Computes $\langle i|H|i \rangle$. WITHOUT ANY CONTRIBUTIONS FROM 3E TERMS
+  END_DOC
+  integer,intent(in)             :: Nint
+  integer(bit_kind),intent(in)   :: det_in(Nint,2)
+  double precision, intent(out)  :: htot
+  double precision :: hmono,htwoe
+
+  integer(bit_kind)              :: hole(Nint,2)
+  integer(bit_kind)              :: particle(Nint,2)
+  integer                        :: i, nexc(2), ispin
+  integer                        :: occ_particle(Nint*bit_kind_size,2)
+  integer                        :: occ_hole(Nint*bit_kind_size,2)
+  integer(bit_kind)              :: det_tmp(Nint,2)
+  integer                        :: na, nb
+
+  ASSERT (Nint > 0)
+  ASSERT (sum(popcnt(det_in(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(det_in(:,2))) == elec_beta_num)
+
+
+  nexc(1) = 0
+  nexc(2) = 0
+  do i=1,Nint
+    hole(i,1)     = xor(det_in(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(det_in(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),det_in(i,1))
+    particle(i,2) = iand(hole(i,2),det_in(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+
+  if (nexc(1)+nexc(2) == 0) then
+    hmono = ref_tc_energy_1e
+    htwoe = ref_tc_energy_2e
+    htot = ref_tc_energy_tot
+    return
+  endif
+
+  !call debug_det(det_in,Nint)
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+
+  
+  det_tmp = ref_bitmask
+  hmono = ref_tc_energy_1e
+  htwoe = ref_tc_energy_2e 
+  do ispin=1,2
+    na = elec_num_tab(ispin)
+    nb = elec_num_tab(iand(ispin,1)+1)
+    do i=1,nexc(ispin)
+      !DIR$ FORCEINLINE
+      call ac_tc_operator_no_3e( occ_particle(i,ispin), ispin, det_tmp, hmono,htwoe, Nint,na,nb)
+      !DIR$ FORCEINLINE
+      call a_tc_operator_no_3e ( occ_hole    (i,ispin), ispin, det_tmp, hmono,htwoe, Nint,na,nb)
+    enddo
+  enddo
+  htot = hmono+htwoe
+end
+
+subroutine ac_tc_operator_no_3e(iorb,ispin,key,hmono,htwoe,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the ADDITION of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the ADDITION of that electron 
+  !
+  ! and the quantities hmono,htwoe are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  double precision ::  direct_int, exchange_int
+  
+
+  if (iorb < 1) then
+    print *,  irp_here, ': iorb < 1'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+  if (iorb > mo_num) then
+    print *,  irp_here, ': iorb > mo_num'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  ASSERT (tmp(1) == elec_alpha_num)
+  ASSERT (tmp(2) == elec_beta_num)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key(k,ispin) = ibset(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  hmono = hmono + mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+  na = na+1
+end
+
+subroutine a_tc_operator_no_3e(iorb,ispin,key,hmono,htwoe,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the REMOVAL of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the REMOVAL of that electron 
+  !
+  ! and the quantities hmono,htwoe are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe
+  
+  double precision  :: direct_int, exchange_int
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  integer                        :: tmp(2)
+
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k>0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  key(k,ispin) = ibclr(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  na = na-1
+
+  hmono = hmono - mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+end
+
diff --git a/src/tc_bi_ortho/slater_tc_opt_double.irp.f b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
index 9d33523b..d094d76e 100644
--- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@@ -419,3 +419,58 @@ subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib)
  enddo
 end
 
+
+subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for double excitation  ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint 
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: htot
+  double precision :: hmono, htwoe
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int,phase
+
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+
+  if(degree.ne.2)then
+   return
+  endif
+  integer :: degree_i,degree_j
+  call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int)
+  call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int)
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+  if(s1.ne.s2)then
+   ! opposite spin two-body 
+    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
+  else
+   ! same spin two-body 
+   ! direct terms 
+   htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
+   ! exchange terms 
+   htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
+  endif
+  htwoe  *= phase
+  htot    =  htwoe 
+
+end
+
diff --git a/src/tc_bi_ortho/slater_tc_opt_single.irp.f b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
index ae41591a..7cff3c73 100644
--- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
@@ -458,3 +458,115 @@ BEGIN_PROVIDER [double precision, fock_op_2_e_tc_closed_shell, (mo_num, mo_num)
 
 END_PROVIDER
 
+
+subroutine  single_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot)
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) ::  htot
+  double precision :: hmono, htwoe
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int, phase
+  double precision              :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_j_core(Nint,2), key_i_core(Nint,2)
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.ne.1)then
+   return
+  endif
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc,1,h1,p1,h2,p2,s1,s2)
+  call get_single_excitation_from_fock_tc_no_3e(key_i,key_j,h1,p1,s1,phase,hmono,htwoe,htot)
+end
+
+
+subroutine get_single_excitation_from_fock_tc_no_3e(key_i,key_j,h,p,spin,phase,hmono,htwoe,htot)
+ use bitmasks
+ implicit none
+ integer,intent(in) :: h,p,spin
+ double precision, intent(in)  :: phase
+ integer(bit_kind), intent(in) :: key_i(N_int,2), key_j(N_int,2)
+ double precision, intent(out) :: hmono,htwoe,htot
+ integer(bit_kind) :: differences(N_int,2)
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: partcl(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_partcl(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
+ integer :: i0,i
+ double precision :: buffer_c(mo_num),buffer_x(mo_num)
+ do i=1, mo_num
+   buffer_c(i) = tc_2e_3idx_coulomb_integrals(i,p,h)
+   buffer_x(i) = tc_2e_3idx_exchange_integrals(i,p,h)
+ enddo
+ do i = 1, N_int
+  differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1))
+  differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2))
+  hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1))
+  hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2))
+  partcl(i,1) = iand(differences(i,1),key_i(i,1))
+  partcl(i,2) = iand(differences(i,2),key_i(i,2))
+ enddo
+ call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
+ call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
+ hmono = mo_bi_ortho_tc_one_e(p,h)
+ htwoe = fock_op_2_e_tc_closed_shell(p,h)
+ ! holes :: direct terms
+ do i0 = 1, n_occ_ab_hole(1)
+  i = occ_hole(i0,1)
+  htwoe -= buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_hole(2)
+  i = occ_hole(i0,2)
+  htwoe -= buffer_c(i)
+ enddo
+
+ ! holes :: exchange terms
+ do i0 = 1, n_occ_ab_hole(spin)
+  i = occ_hole(i0,spin)
+  htwoe += buffer_x(i)
+ enddo
+
+ ! particles :: direct terms
+ do i0 = 1, n_occ_ab_partcl(1)
+  i = occ_partcl(i0,1)
+  htwoe += buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_partcl(2)
+  i = occ_partcl(i0,2)
+  htwoe += buffer_c(i)
+ enddo
+
+ ! particles :: exchange terms
+ do i0 = 1, n_occ_ab_partcl(spin)
+  i = occ_partcl(i0,spin)
+  htwoe -= buffer_x(i)
+ enddo
+ htwoe = htwoe * phase
+ hmono = hmono * phase
+ htot  = htwoe + hmono 
+
+end
+

From c0f60753c23d73a360409c137ad909ea99017e9a Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Thu, 26 Jan 2023 11:29:36 +0100
Subject: [PATCH 56/68] moved the point charges definition in nuclei and added
 the interaction between the charges and nuclei

---
 src/ao_one_e_ints/EZFIO.cfg                   |  23 --
 src/ao_one_e_ints/point_charges.irp.f         | 272 ------------------
 src/ao_one_e_ints/pot_pt_charges.irp.f        | 108 +++++++
 src/cas_based_on_top/two_body_dens_rout.irp.f |   2 +-
 src/cipsi_tc_bi_ortho/get_d.irp.f             |  10 +-
 src/cipsi_tc_bi_ortho/selection.irp.f         |   6 +
 src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f  |   7 +-
 src/nuclei/EZFIO.cfg                          |  24 ++
 src/nuclei/nuclei.irp.f                       |   3 +
 src/nuclei/point_charges.irp.f                | 209 ++++++++++++++
 .../write_pt_charges.py                       |   4 +-
 11 files changed, 362 insertions(+), 306 deletions(-)
 delete mode 100644 src/ao_one_e_ints/point_charges.irp.f
 create mode 100644 src/ao_one_e_ints/pot_pt_charges.irp.f
 create mode 100644 src/nuclei/point_charges.irp.f
 rename src/{ao_one_e_ints => nuclei}/write_pt_charges.py (94%)

diff --git a/src/ao_one_e_ints/EZFIO.cfg b/src/ao_one_e_ints/EZFIO.cfg
index 262301e0..8d4fff57 100644
--- a/src/ao_one_e_ints/EZFIO.cfg
+++ b/src/ao_one_e_ints/EZFIO.cfg
@@ -106,26 +106,3 @@ interface: ezfio,provider,ocaml
 default: 1.e-15
 ezfio_name: threshold_ao
 
-[n_pts_charge]
-type: integer
-doc: Number of point charges to be added to the potential
-interface: ezfio
-default: 0
-
-[pts_charge_z]
-type: double precision
-doc: Charge associated to each point charge
-interface: ezfio
-size: (ao_one_e_ints.n_pts_charge)
-
-[pts_charge_coord]
-type: double precision
-doc: Coordinate of each point charge.
-interface: ezfio
-size: (ao_one_e_ints.n_pts_charge,3)
-
-[point_charges]
-type: logical
-doc: If |true|, point charges (see ao_one_e_ints/write_pt_charges.py) are added to the one-electron potential
-interface: ezfio,provider,ocaml
-default: False
diff --git a/src/ao_one_e_ints/point_charges.irp.f b/src/ao_one_e_ints/point_charges.irp.f
deleted file mode 100644
index c038458d..00000000
--- a/src/ao_one_e_ints/point_charges.irp.f
+++ /dev/null
@@ -1,272 +0,0 @@
-
-! ---
-
-
-BEGIN_PROVIDER [ integer, n_pts_charge  ]
-  implicit none
-  BEGIN_DOC
-! Number of point charges to be added to the potential
-  END_DOC
-
-  logical                        :: has
-  PROVIDE ezfio_filename
-  if (mpi_master) then
-    
-    call ezfio_has_ao_one_e_ints_n_pts_charge(has)
-    if (has) then
-      write(6,'(A)') '.. >>>>> [ IO READ: n_pts_charge ] <<<<< ..'
-      call ezfio_get_ao_one_e_ints_n_pts_charge(n_pts_charge)
-    else
-      print *, 'ao_one_e_ints/n_pts_charge not found in EZFIO file'
-      stop 1
-    endif
-  endif
-  IRP_IF MPI_DEBUG
-    print *,  irp_here, mpi_rank
-    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
-  IRP_ENDIF
-  IRP_IF MPI
-    include 'mpif.h'
-    integer :: ierr
-    call MPI_BCAST( n_pts_charge, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-    if (ierr /= MPI_SUCCESS) then
-      stop 'Unable to read n_pts_charge with MPI'
-    endif
-  IRP_ENDIF
-
-  call write_time(6)
-
-END_PROVIDER
-
-BEGIN_PROVIDER [ double precision, pts_charge_z, (n_pts_charge) ]
-
-  BEGIN_DOC
-  ! Charge associated to each point charge. 
-  END_DOC
-
-  implicit none
-  logical :: exists
-
-  PROVIDE ezfio_filename
-
-  if (mpi_master) then
-    call ezfio_has_ao_one_e_ints_pts_charge_z(exists)
-  endif
-
-  IRP_IF MPI_DEBUG
-    print *,  irp_here, mpi_rank
-    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
-  IRP_ENDIF
-
-  IRP_IF MPI
-    include 'mpif.h'
-    integer :: ierr
-    call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
-    if (ierr /= MPI_SUCCESS) then
-      stop 'Unable to read pts_charge_z with MPI'
-    endif
-  IRP_ENDIF
-
-  if (exists) then
-
-    if (mpi_master) then
-      write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_z ] <<<<< ..'
-      call ezfio_get_ao_one_e_ints_pts_charge_z(pts_charge_z)
-      IRP_IF MPI
-        call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
-        if (ierr /= MPI_SUCCESS) then
-          stop 'Unable to read pts_charge_z with MPI'
-        endif
-      IRP_ENDIF
-    endif
-
-  else
- 
-    integer :: i
-    do i = 1, n_pts_charge
-      pts_charge_z(i) = 0.d0
-    enddo
-
-  endif
- print*,'Point charges '
- do i = 1, n_pts_charge
-  print*,'i,pts_charge_z(i)',i,pts_charge_z(i)
- enddo
-
-END_PROVIDER
-
-
-BEGIN_PROVIDER [ double precision, pts_charge_coord, (n_pts_charge,3) ]
-
-  BEGIN_DOC
-  ! Coordinates of each point charge. 
-  END_DOC
-
-  implicit none
-  logical :: exists
-
-  PROVIDE ezfio_filename
-
-  if (mpi_master) then
-    call ezfio_has_ao_one_e_ints_pts_charge_coord(exists)
-  endif
-
-  IRP_IF MPI_DEBUG
-    print *,  irp_here, mpi_rank
-    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
-  IRP_ENDIF
-
-  IRP_IF MPI
-    include 'mpif.h'
-    integer :: ierr
-    call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
-    if (ierr /= MPI_SUCCESS) then
-      stop 'Unable to read pts_charge_coord with MPI'
-    endif
-  IRP_ENDIF
-
-  if (exists) then
-
-    if (mpi_master) then
-     double precision, allocatable  :: buffer(:,:)
-     allocate (buffer(n_pts_charge,3))
-      write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_coord ] <<<<< ..'
-      call ezfio_get_ao_one_e_ints_pts_charge_coord(buffer)
-      integer :: i,j
-      do i=1,3
-        do j=1,n_pts_charge
-          pts_charge_coord(j,i) = buffer(j,i)
-        enddo
-      enddo
-     deallocate(buffer)
-      IRP_IF MPI
-        call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
-        if (ierr /= MPI_SUCCESS) then
-          stop 'Unable to read pts_charge_coord with MPI'
-        endif
-      IRP_ENDIF
-    endif
-
-  else
- 
-    do i = 1, n_pts_charge
-      pts_charge_coord(i,:) = 0.d0
-    enddo
-
-  endif
- print*,'Coordinates for the point charges '
- do i = 1, n_pts_charge
-  write(*,'(I3,X,3(F16.8,X))'),i,pts_charge_coord(i,1:3)
- enddo
-
-END_PROVIDER
-
-! ---
-
-BEGIN_PROVIDER [ double precision, ao_integrals_pt_chrg, (ao_num,ao_num)]
-
-  BEGIN_DOC
-  !  Point charge-electron interaction, in the |AO| basis set.
-  !
-  !  :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle`
-  !
-  !  These integrals also contain the pseudopotential integrals.
-  END_DOC
-
-  implicit none
-  integer          :: num_A, num_B, power_A(3), power_B(3)
-  integer          :: i, j, k, l, n_pt_in, m
-  double precision :: alpha, beta
-  double precision :: A_center(3),B_center(3),C_center(3)
-  double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
-
-  ao_integrals_pt_chrg = 0.d0
-
-!  if (read_ao_integrals_pt_chrg) then
-!
-!    call ezfio_get_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
-!    print *,  'AO N-e integrals read from disk'
-!
-!  else
-
-!    if(use_cosgtos) then
-!      !print *, " use_cosgtos for ao_integrals_pt_chrg ?", use_cosgtos
-!
-!      do j = 1, ao_num
-!        do i = 1, ao_num
-!          ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg_cosgtos(i,j)
-!        enddo
-!      enddo
-!
-!    else
-
-      !$OMP PARALLEL                                                   &
-          !$OMP DEFAULT (NONE)                                         &
-          !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
-          !$OMP          num_A,num_B,Z,c,c1,n_pt_in)                      &
-          !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,pts_charge_coord,ao_coef_normalized_ordered_transp,nucl_coord,&
-          !$OMP         n_pt_max_integrals,ao_integrals_pt_chrg,n_pts_charge,pts_charge_z)
-
-      n_pt_in = n_pt_max_integrals
-
-      !$OMP DO SCHEDULE (dynamic)
-
-      do j = 1, ao_num
-        num_A = ao_nucl(j)
-        power_A(1:3)= ao_power(j,1:3)
-        A_center(1:3) = nucl_coord(num_A,1:3)
-
-        do i = 1, ao_num
-
-          num_B = ao_nucl(i)
-          power_B(1:3)= ao_power(i,1:3)
-          B_center(1:3) = nucl_coord(num_B,1:3)
-
-          do l=1,ao_prim_num(j)
-            alpha = ao_expo_ordered_transp(l,j)
-
-            do m=1,ao_prim_num(i)
-              beta = ao_expo_ordered_transp(m,i)
-
-              double precision               :: c, c1
-              c = 0.d0
-
-              do  k = 1, n_pts_charge
-                double precision               :: Z
-                Z = pts_charge_z(k)
-
-                C_center(1:3) = pts_charge_coord(k,1:3)
-
-                c1 = NAI_pol_mult( A_center, B_center, power_A, power_B &
-                                 , alpha, beta, C_center, n_pt_in )
-
-                c = c + Z * c1
-
-              enddo
-              ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg(i,j)  &
-                  + ao_coef_normalized_ordered_transp(l,j)             &
-                  * ao_coef_normalized_ordered_transp(m,i) * c
-            enddo
-          enddo
-        enddo
-      enddo
-
-    !$OMP END DO
-    !$OMP END PARALLEL
-
-!    endif
-
-
-!    IF(do_pseudo) THEN
-!       ao_integrals_pt_chrg += ao_pseudo_integrals
-!    ENDIF
-
-!  endif
-
-
-!  if (write_ao_integrals_pt_chrg) then
-!    call ezfio_set_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
-!    print *,  'AO N-e integrals written to disk'
-!  endif
-
-END_PROVIDER
diff --git a/src/ao_one_e_ints/pot_pt_charges.irp.f b/src/ao_one_e_ints/pot_pt_charges.irp.f
new file mode 100644
index 00000000..93f1acff
--- /dev/null
+++ b/src/ao_one_e_ints/pot_pt_charges.irp.f
@@ -0,0 +1,108 @@
+
+BEGIN_PROVIDER [ double precision, ao_integrals_pt_chrg, (ao_num,ao_num)]
+
+  BEGIN_DOC
+  !  Point charge-electron interaction, in the |AO| basis set.
+  !
+  !  :math:`\langle \chi_i | -\sum_charge charge * \frac{1}{|r-R_charge|} | \chi_j \rangle`
+  !
+  ! Notice the minus sign convention as it is supposed to be for electrons. 
+  END_DOC
+
+  implicit none
+  integer          :: num_A, num_B, power_A(3), power_B(3)
+  integer          :: i, j, k, l, n_pt_in, m
+  double precision :: alpha, beta
+  double precision :: A_center(3),B_center(3),C_center(3)
+  double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
+
+  ao_integrals_pt_chrg = 0.d0
+
+!  if (read_ao_integrals_pt_chrg) then
+!
+!    call ezfio_get_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
+!    print *,  'AO N-e integrals read from disk'
+!
+!  else
+
+!    if(use_cosgtos) then
+!      !print *, " use_cosgtos for ao_integrals_pt_chrg ?", use_cosgtos
+!
+!      do j = 1, ao_num
+!        do i = 1, ao_num
+!          ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg_cosgtos(i,j)
+!        enddo
+!      enddo
+!
+!    else
+
+      !$OMP PARALLEL                                                   &
+          !$OMP DEFAULT (NONE)                                         &
+          !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
+          !$OMP          num_A,num_B,Z,c,c1,n_pt_in)                      &
+          !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,pts_charge_coord,ao_coef_normalized_ordered_transp,nucl_coord,&
+          !$OMP         n_pt_max_integrals,ao_integrals_pt_chrg,n_pts_charge,pts_charge_z)
+
+      n_pt_in = n_pt_max_integrals
+
+      !$OMP DO SCHEDULE (dynamic)
+
+      do j = 1, ao_num
+        num_A = ao_nucl(j)
+        power_A(1:3)= ao_power(j,1:3)
+        A_center(1:3) = nucl_coord(num_A,1:3)
+
+        do i = 1, ao_num
+
+          num_B = ao_nucl(i)
+          power_B(1:3)= ao_power(i,1:3)
+          B_center(1:3) = nucl_coord(num_B,1:3)
+
+          do l=1,ao_prim_num(j)
+            alpha = ao_expo_ordered_transp(l,j)
+
+            do m=1,ao_prim_num(i)
+              beta = ao_expo_ordered_transp(m,i)
+
+              double precision               :: c, c1
+              c = 0.d0
+
+              do  k = 1, n_pts_charge
+                double precision               :: Z
+                Z = pts_charge_z(k)
+
+                C_center(1:3) = pts_charge_coord(k,1:3)
+
+                c1 = NAI_pol_mult( A_center, B_center, power_A, power_B &
+                                 , alpha, beta, C_center, n_pt_in )
+
+                c = c - Z * c1
+
+              enddo
+              ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg(i,j)  &
+                  + ao_coef_normalized_ordered_transp(l,j)             &
+                  * ao_coef_normalized_ordered_transp(m,i) * c
+            enddo
+          enddo
+        enddo
+      enddo
+
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+!    endif
+
+
+!    IF(do_pseudo) THEN
+!       ao_integrals_pt_chrg += ao_pseudo_integrals
+!    ENDIF
+
+!  endif
+
+
+!  if (write_ao_integrals_pt_chrg) then
+!    call ezfio_set_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
+!    print *,  'AO N-e integrals written to disk'
+!  endif
+
+END_PROVIDER
diff --git a/src/cas_based_on_top/two_body_dens_rout.irp.f b/src/cas_based_on_top/two_body_dens_rout.irp.f
index 4a57a868..5d066831 100644
--- a/src/cas_based_on_top/two_body_dens_rout.irp.f
+++ b/src/cas_based_on_top/two_body_dens_rout.irp.f
@@ -132,7 +132,7 @@ end
 subroutine give_n2_cas(r1,r2,istate,n2_psi)
  implicit none
  BEGIN_DOC
-! returns mu(r), f_psi, n2_psi for a general cas wave function
+! returns n2_psi for a general cas wave function
  END_DOC
  integer, intent(in) :: istate
  double precision, intent(in)  :: r1(3),r2(3)
diff --git a/src/cipsi_tc_bi_ortho/get_d.irp.f b/src/cipsi_tc_bi_ortho/get_d.irp.f
index 58b1972a..7fdc5e12 100644
--- a/src/cipsi_tc_bi_ortho/get_d.irp.f
+++ b/src/cipsi_tc_bi_ortho/get_d.irp.f
@@ -523,10 +523,10 @@ subroutine get_d1(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, s
 !    call get_mo_bi_ortho_tc_two_es(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map)
 !    call get_mo_bi_ortho_tc_two_es(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map)
     do mm = 1, mo_num
-     hji_cache(mm,1) = mo_bi_ortho_tc_two_e(pfix,p1,mm,hfix)
-     hji_cache(mm,2) = mo_bi_ortho_tc_two_e(pfix,p2,mm,hfix)
      hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1)
      hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2)
+     hji_cache(mm,1) = mo_bi_ortho_tc_two_e(pfix,p1,mm,hfix)
+     hji_cache(mm,2) = mo_bi_ortho_tc_two_e(pfix,p2,mm,hfix)
     enddo
     putj = p1
     do puti = 1, mo_num !HOT
@@ -800,7 +800,7 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, s
       if(bannedOrb(p1, 1)) cycle
 !      call get_mo_bi_ortho_tc_two_es(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map)
       do mm =1, mo_num
-       hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,p1,h2,h1)
+       hji_cache1(mm) = mo_bi_ortho_tc_two_e(mm,p1,h2,h1)
        hji_cache1(mm) = mo_bi_ortho_tc_two_e(h2,h1,mm,p1)
       enddo
       do p2=1, mo_num
@@ -811,8 +811,8 @@ subroutine get_d0(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, s
           call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
 !          call i_h_j(gen, det, N_int, hij)
            !!! GUESS ON THE ORDER 
-          call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij)
-          call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hji)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hij)
         else
 !          print*,'ELSE '
           phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f
index 9c695ba8..659e50a8 100644
--- a/src/cipsi_tc_bi_ortho/selection.irp.f
+++ b/src/cipsi_tc_bi_ortho/selection.irp.f
@@ -807,11 +807,17 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
           print*,'-- bad '
           print*,psi_h_alpha_tmp,alpha_h_psi_tmp  
           print*,'-- details good'
+        double precision :: accu_1, accu_2
+        accu_1 = 0.d0
+        accu_2 = 0.d0
         do iii = 1, N_det
           call get_excitation_degree( psi_det(1,1,iii), det, degree, N_int)
           call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,iii), det, N_int, i_h_alpha)
           call htilde_mu_mat_bi_ortho_tot(det, psi_det(1,1,iii), N_int, alpha_h_i)
           print*,iii,degree,i_h_alpha,alpha_h_i
+          accu_1 += i_h_alpha
+          accu_2 += alpha_h_i
+          print*,accu_1,accu_2
           
         enddo
 !          if(dabs(psi_h_alpha*alpha_h_psi).gt.1.d-10)then
diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
index 01cb57dd..dc8a4c07 100644
--- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
+++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
@@ -44,9 +44,10 @@ subroutine run_stochastic_cipsi
   pt2_data % overlap= 0.d0
   pt2_data % variance = huge(1.e0)
 
-  if (s2_eig) then
-    call make_s2_eigenfunction
-  endif
+  !!!! WARNING  !!!! SEEMS TO BE PROBLEM WTH make_s2_eigenfunction !!!! THE DETERMINANTS CAN APPEAR TWICE IN THE WFT DURING SELECTION
+!  if (s2_eig) then
+!    call make_s2_eigenfunction
+!  endif
   print_pt2 = .False.
   call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
 !  call routine_save_right
diff --git a/src/nuclei/EZFIO.cfg b/src/nuclei/EZFIO.cfg
index 34c27c46..bd25e38a 100644
--- a/src/nuclei/EZFIO.cfg
+++ b/src/nuclei/EZFIO.cfg
@@ -37,3 +37,27 @@ type: logical
 doc: If true, the calculation uses periodic boundary conditions
 interface: ezfio, provider, ocaml
 default: false
+
+[n_pts_charge]
+type: integer
+doc: Number of point charges to be added to the potential
+interface: ezfio
+default: 0
+
+[pts_charge_z]
+type: double precision
+doc: Charge associated to each point charge
+interface: ezfio
+size: (nuclei.n_pts_charge)
+
+[pts_charge_coord]
+type: double precision
+doc: Coordinate of each point charge.
+interface: ezfio
+size: (nuclei.n_pts_charge,3)
+
+[point_charges]
+type: logical
+doc: If |true|, point charges (see nuclei/write_pt_charges.py) are added to the one-electron potential
+interface: ezfio,provider,ocaml
+default: False
diff --git a/src/nuclei/nuclei.irp.f b/src/nuclei/nuclei.irp.f
index c1b5f52f..f765e107 100644
--- a/src/nuclei/nuclei.irp.f
+++ b/src/nuclei/nuclei.irp.f
@@ -205,6 +205,9 @@ BEGIN_PROVIDER [ double precision, nuclear_repulsion ]
        enddo
      enddo
      nuclear_repulsion *= 0.5d0
+     if(point_charges)then
+      nuclear_repulsion += pt_chrg_nuclei_repulsion + pt_chrg_repulsion
+     endif
    end if
 
    call write_time(6)
diff --git a/src/nuclei/point_charges.irp.f b/src/nuclei/point_charges.irp.f
new file mode 100644
index 00000000..86038742
--- /dev/null
+++ b/src/nuclei/point_charges.irp.f
@@ -0,0 +1,209 @@
+! ---
+
+
+BEGIN_PROVIDER [ integer, n_pts_charge  ]
+  implicit none
+  BEGIN_DOC
+! Number of point charges to be added to the potential
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  if (mpi_master) then
+    
+    call ezfio_has_nuclei_n_pts_charge(has)
+    if (has) then
+      write(6,'(A)') '.. >>>>> [ IO READ: n_pts_charge ] <<<<< ..'
+      call ezfio_get_nuclei_n_pts_charge(n_pts_charge)
+    else
+      print *, 'nuclei/n_pts_charge not found in EZFIO file'
+      stop 1
+    endif
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST( n_pts_charge, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read n_pts_charge with MPI'
+    endif
+  IRP_ENDIF
+
+  call write_time(6)
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, pts_charge_z, (n_pts_charge) ]
+
+  BEGIN_DOC
+  ! Charge associated to each point charge. 
+  END_DOC
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_nuclei_pts_charge_z(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read pts_charge_z with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+
+    if (mpi_master) then
+      write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_z ] <<<<< ..'
+      call ezfio_get_nuclei_pts_charge_z(pts_charge_z)
+      IRP_IF MPI
+        call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          stop 'Unable to read pts_charge_z with MPI'
+        endif
+      IRP_ENDIF
+    endif
+
+  else
+ 
+    integer :: i
+    do i = 1, n_pts_charge
+      pts_charge_z(i) = 0.d0
+    enddo
+
+  endif
+ print*,'Point charges '
+ do i = 1, n_pts_charge
+  print*,'i,pts_charge_z(i)',i,pts_charge_z(i)
+ enddo
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, pts_charge_coord, (n_pts_charge,3) ]
+
+  BEGIN_DOC
+  ! Coordinates of each point charge. 
+  END_DOC
+
+  implicit none
+  logical :: exists
+
+  PROVIDE ezfio_filename
+
+  if (mpi_master) then
+    call ezfio_has_nuclei_pts_charge_coord(exists)
+  endif
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read pts_charge_coord with MPI'
+    endif
+  IRP_ENDIF
+
+  if (exists) then
+
+    if (mpi_master) then
+     double precision, allocatable  :: buffer(:,:)
+     allocate (buffer(n_pts_charge,3))
+      write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_coord ] <<<<< ..'
+      call ezfio_get_nuclei_pts_charge_coord(buffer)
+      integer :: i,j
+      do i=1,3
+        do j=1,n_pts_charge
+          pts_charge_coord(j,i) = buffer(j,i)
+        enddo
+      enddo
+     deallocate(buffer)
+      IRP_IF MPI
+        call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          stop 'Unable to read pts_charge_coord with MPI'
+        endif
+      IRP_ENDIF
+    endif
+
+  else
+ 
+    do i = 1, n_pts_charge
+      pts_charge_coord(i,:) = 0.d0
+    enddo
+
+  endif
+ print*,'Coordinates for the point charges '
+ do i = 1, n_pts_charge
+  write(*,'(I3,X,3(F16.8,X))'),i,pts_charge_coord(i,1:3)
+ enddo
+
+END_PROVIDER
+
+! ---
+BEGIN_PROVIDER [ double precision, pt_chrg_repulsion]
+ implicit none
+ BEGIN_DOC
+ ! repulsion between the point charges 
+ END_DOC
+ integer :: i,j
+ double precision               :: Z_A, z_B,A_center(3), B_center(3), dist
+ pt_chrg_repulsion = 0.d0
+ do  i = 1, n_pts_charge
+   Z_A = pts_charge_z(i)
+   A_center(1:3) = pts_charge_coord(i,1:3)
+   do  j = i+1, n_pts_charge
+     Z_B = pts_charge_z(j)
+     B_center(1:3) = pts_charge_coord(j,1:3)
+     dist = (A_center(1)-B_center(1))**2 + (A_center(2)-B_center(2))**2 + (A_center(3)-B_center(3))**2
+     dist = dsqrt(dist)
+     pt_chrg_repulsion += Z_A*Z_B/dist
+   enddo
+ enddo
+ print*,'Repulsion of point charges '
+ print*,'pt_chrg_repulsion = ',pt_chrg_repulsion
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, pt_chrg_nuclei_repulsion]
+ implicit none
+ BEGIN_DOC
+ ! repulsion between the point charges and the nuclei
+ END_DOC
+ integer :: i,j
+ double precision               :: Z_A, z_B,A_center(3), B_center(3), dist
+ pt_chrg_nuclei_repulsion = 0.d0
+ do  i = 1, n_pts_charge
+   Z_A = pts_charge_z(i)
+   A_center(1:3) = pts_charge_coord(i,1:3)
+   do  j = 1, nucl_num
+     Z_B = nucl_charge(j)
+     B_center(1:3) = nucl_coord(j,1:3)
+     dist = (A_center(1)-B_center(1))**2 + (A_center(2)-B_center(2))**2 + (A_center(3)-B_center(3))**2
+     dist = dsqrt(dist)
+     pt_chrg_nuclei_repulsion += Z_A*Z_B/dist
+   enddo
+ enddo
+ print*,'Repulsion between point charges and nuclei'
+ print*,'pt_chrg_nuclei_repulsion = ',pt_chrg_nuclei_repulsion
+END_PROVIDER 
+
diff --git a/src/ao_one_e_ints/write_pt_charges.py b/src/nuclei/write_pt_charges.py
similarity index 94%
rename from src/ao_one_e_ints/write_pt_charges.py
rename to src/nuclei/write_pt_charges.py
index d4b6d251..d722faa8 100755
--- a/src/ao_one_e_ints/write_pt_charges.py
+++ b/src/nuclei/write_pt_charges.py
@@ -13,11 +13,11 @@ def zip_in_ezfio(ezfio,tmp):
   cmdzip="gzip -c "+tmp+" > "+tmpzip
   os.system(cmdzip)
   os.system("rm "+tmp)
-  cmdmv="mv "+tmpzip+" "+EZFIO+"/ao_one_e_ints/"+tmpzip
+  cmdmv="mv "+tmpzip+" "+EZFIO+"/nuclei/"+tmpzip
   os.system(cmdmv)
 
 def mv_in_ezfio(ezfio,tmp):
-  cmdmv="mv "+tmp+" "+EZFIO+"/ao_one_e_ints/"+tmp
+  cmdmv="mv "+tmp+" "+EZFIO+"/nuclei/"+tmp
   os.system(cmdmv)
 
 

From da344f6bcd0fff57b1f6d2200f9833a54ef80247 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Thu, 26 Jan 2023 12:46:01 +0100
Subject: [PATCH 57/68] Fix documentation

---
 docs/source/users_guide/qp_convert_output_to_ezfio.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/users_guide/qp_convert_output_to_ezfio.rst b/docs/source/users_guide/qp_convert_output_to_ezfio.rst
index 171e2796..035ced4a 100644
--- a/docs/source/users_guide/qp_convert_output_to_ezfio.rst
+++ b/docs/source/users_guide/qp_convert_output_to_ezfio.rst
@@ -25,7 +25,7 @@ Usage
 
 .. note::
 
-   All the parameters of the wave functgion need to be presente in the
+   All the parameters of the wave function need to be present in the
    output file : complete description of the |AO| basis set, full set of
    molecular orbitals, etc.
 

From 1240b324ce731596f972e901e0bb617c849eac22 Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 27 Jan 2023 18:01:57 +0100
Subject: [PATCH 58/68] corrected big bug introduced before in
 determinants/fock_diag.irp.f

---
 src/determinants/fock_diag.irp.f | 34 ++++++++++++++++----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/determinants/fock_diag.irp.f b/src/determinants/fock_diag.irp.f
index c7c951b3..a8ce33b8 100644
--- a/src/determinants/fock_diag.irp.f
+++ b/src/determinants/fock_diag.irp.f
@@ -33,59 +33,59 @@ subroutine build_fock_tmp(fock_diag_tmp,det_ref,Nint)
   ! Occupied MOs
   do ii=1,elec_alpha_num
     i = occ(ii,1)
-    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_one_e(i,i)
-    E0 = E0 + mo_bi_ortho_tc_one_e(i,i)
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
+    E0 = E0 + mo_one_e_integrals(i,i)
     do jj=1,elec_alpha_num
       j = occ(jj,1)
       if (i==j) cycle
-      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_two_e_jj_anti(i,j)
-      E0 = E0 + 0.5d0*mo_bi_ortho_tc_two_e_jj_anti(i,j)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
+      E0 = E0 + 0.5d0*mo_two_e_integrals_jj_anti(i,j)
     enddo
     do jj=1,elec_beta_num
       j = occ(jj,2)
-      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_two_e_jj(i,j)
-      E0 = E0 + mo_bi_ortho_tc_two_e_jj(i,j)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj(i,j)
+      E0 = E0 + mo_two_e_integrals_jj(i,j)
     enddo
   enddo
   do ii=1,elec_beta_num
     i = occ(ii,2)
-    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_one_e(i,i)
-    E0 = E0 + mo_bi_ortho_tc_one_e(i,i)
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
+    E0 = E0 + mo_one_e_integrals(i,i)
     do jj=1,elec_beta_num
       j = occ(jj,2)
       if (i==j) cycle
-      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_two_e_jj_anti(i,j)
-      E0 = E0 + 0.5d0*mo_bi_ortho_tc_two_e_jj_anti(i,j)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
+      E0 = E0 + 0.5d0*mo_two_e_integrals_jj_anti(i,j)
     enddo
     do jj=1,elec_alpha_num
       j = occ(jj,1)
-      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_two_e_jj(i,j)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj(i,j)
     enddo
   enddo
 
   ! Virtual MOs
   do i=1,mo_num
     if (fock_diag_tmp(1,i) /= 0.d0) cycle
-    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_one_e(i,i)
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
     do jj=1,elec_alpha_num
       j = occ(jj,1)
-      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_two_e_jj_anti(i,j)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
     enddo
     do jj=1,elec_beta_num
       j = occ(jj,2)
-      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_bi_ortho_tc_two_e_jj(i,j)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj(i,j)
     enddo
   enddo
   do i=1,mo_num
     if (fock_diag_tmp(2,i) /= 0.d0) cycle
-    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_one_e(i,i)
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
     do jj=1,elec_beta_num
       j = occ(jj,2)
-      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_two_e_jj_anti(i,j)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
     enddo
     do jj=1,elec_alpha_num
       j = occ(jj,1)
-      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_bi_ortho_tc_two_e_jj(i,j)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj(i,j)
     enddo
   enddo
 

From 18d186228d9cc0afc1907fcbdd11774bdb41551e Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 27 Jan 2023 18:04:22 +0100
Subject: [PATCH 59/68] corrected cipsi_tc_bi_ortho/selection.irp.f with
 build_fock_tmp_tc

---
 src/cipsi_tc_bi_ortho/fock_diag.irp.f | 95 +++++++++++++++++++++++++++
 src/cipsi_tc_bi_ortho/selection.irp.f |  2 +-
 2 files changed, 96 insertions(+), 1 deletion(-)
 create mode 100644 src/cipsi_tc_bi_ortho/fock_diag.irp.f

diff --git a/src/cipsi_tc_bi_ortho/fock_diag.irp.f b/src/cipsi_tc_bi_ortho/fock_diag.irp.f
new file mode 100644
index 00000000..af6849ab
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/fock_diag.irp.f
@@ -0,0 +1,95 @@
+subroutine build_fock_tmp_tc(fock_diag_tmp,det_ref,Nint)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+! Build the diagonal of the Fock matrix corresponding to a generator
+! determinant. $F_{00}$ is $\langle i|H|i \rangle = E_0$.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: det_ref(Nint,2)
+  double precision, intent(out)  :: fock_diag_tmp(2,mo_num+1)
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: ne(2), i, j, ii, jj
+  double precision               :: E0
+
+  ! Compute Fock matrix diagonal elements
+  call bitstring_to_list_ab(det_ref,occ,Ne,Nint)
+
+  fock_diag_tmp = 0.d0
+  E0 = 0.d0
+
+  if (Ne(1) /= elec_alpha_num) then
+    print *,  'Error in build_fock_tmp_tc (alpha)', Ne(1), Ne(2)
+    call debug_det(det_ref,N_int)
+    stop -1
+  endif
+  if (Ne(2) /= elec_beta_num) then
+    print *, 'Error in build_fock_tmp_tc (beta)', Ne(1), Ne(2)
+    call debug_det(det_ref,N_int)
+    stop -1
+  endif
+
+  ! Occupied MOs
+  do ii=1,elec_alpha_num
+    i = occ(ii,1)
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
+    E0 = E0 + mo_one_e_integrals(i,i)
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      if (i==j) cycle
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
+      E0 = E0 + 0.5d0*mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj(i,j)
+      E0 = E0 + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+  do ii=1,elec_beta_num
+    i = occ(ii,2)
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
+    E0 = E0 + mo_one_e_integrals(i,i)
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      if (i==j) cycle
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
+      E0 = E0 + 0.5d0*mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+
+  ! Virtual MOs
+  do i=1,mo_num
+    if (fock_diag_tmp(1,i) /= 0.d0) cycle
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+  do i=1,mo_num
+    if (fock_diag_tmp(2,i) /= 0.d0) cycle
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+
+  fock_diag_tmp(1,mo_num+1) = E0
+  fock_diag_tmp(2,mo_num+1) = E0
+
+end
diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f
index 659e50a8..8137b922 100644
--- a/src/cipsi_tc_bi_ortho/selection.irp.f
+++ b/src/cipsi_tc_bi_ortho/selection.irp.f
@@ -19,7 +19,7 @@ subroutine select_connected(i_generator, E0, pt2_data, b, subset, csubset)
 
   allocate(fock_diag_tmp(2,mo_num+1))
 
-  call build_fock_tmp(fock_diag_tmp, psi_det_generators(1,1,i_generator), N_int)
+  call build_fock_tmp_tc(fock_diag_tmp, psi_det_generators(1,1,i_generator), N_int)
 
   do k = 1, N_int
       hole_mask(k,1)     = iand(generators_bitmask(k,1,s_hole), psi_det_generators(k,1,i_generator))

From 55750974cdec5e4cf7ec9f94de64c157255771cb Mon Sep 17 00:00:00 2001
From: eginer <giner.emmanuel@gmail.com>
Date: Fri, 27 Jan 2023 18:15:32 +0100
Subject: [PATCH 60/68] rm src/ao_one_e_ints/point_charges.irp.f

---
 external/qp2-dependencies             |   2 +-
 src/ao_one_e_ints/point_charges.irp.f | 272 --------------------------
 2 files changed, 1 insertion(+), 273 deletions(-)
 delete mode 100644 src/ao_one_e_ints/point_charges.irp.f

diff --git a/external/qp2-dependencies b/external/qp2-dependencies
index 242151e0..90ee61f5 160000
--- a/external/qp2-dependencies
+++ b/external/qp2-dependencies
@@ -1 +1 @@
-Subproject commit 242151e03d1d6bf042387226431d82d35845686a
+Subproject commit 90ee61f5041c7c94a0c605625a264860292813a0
diff --git a/src/ao_one_e_ints/point_charges.irp.f b/src/ao_one_e_ints/point_charges.irp.f
deleted file mode 100644
index 82388c0d..00000000
--- a/src/ao_one_e_ints/point_charges.irp.f
+++ /dev/null
@@ -1,272 +0,0 @@
-
-! ---
-
-
-BEGIN_PROVIDER [ integer, n_pts_charge  ]
-  implicit none
-  BEGIN_DOC
-! Number of point charges to be added to the potential
-  END_DOC
-
-  logical                        :: has
-  PROVIDE ezfio_filename
-  if (mpi_master) then
-    
-    call ezfio_has_ao_one_e_ints_n_pts_charge(has)
-    if (has) then
-      write(6,'(A)') '.. >>>>> [ IO READ: n_pts_charge ] <<<<< ..'
-      call ezfio_get_ao_one_e_ints_n_pts_charge(n_pts_charge)
-    else
-      print *, 'ao_one_e_ints/n_pts_charge not found in EZFIO file'
-      stop 1
-    endif
-  endif
-  IRP_IF MPI_DEBUG
-    print *,  irp_here, mpi_rank
-    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
-  IRP_ENDIF
-  IRP_IF MPI
-    include 'mpif.h'
-    integer :: ierr
-    call MPI_BCAST( n_pts_charge, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-    if (ierr /= MPI_SUCCESS) then
-      stop 'Unable to read n_pts_charge with MPI'
-    endif
-  IRP_ENDIF
-
-  call write_time(6)
-
-END_PROVIDER
-
-BEGIN_PROVIDER [ double precision, pts_charge_z, (n_pts_charge) ]
-
-  BEGIN_DOC
-  ! Charge associated to each point charge. 
-  END_DOC
-
-  implicit none
-  logical :: exists
-
-  PROVIDE ezfio_filename
-
-  if (mpi_master) then
-    call ezfio_has_ao_one_e_ints_pts_charge_z(exists)
-  endif
-
-  IRP_IF MPI_DEBUG
-    print *,  irp_here, mpi_rank
-    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
-  IRP_ENDIF
-
-  IRP_IF MPI
-    include 'mpif.h'
-    integer :: ierr
-    call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
-    if (ierr /= MPI_SUCCESS) then
-      stop 'Unable to read pts_charge_z with MPI'
-    endif
-  IRP_ENDIF
-
-  if (exists) then
-
-    if (mpi_master) then
-      write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_z ] <<<<< ..'
-      call ezfio_get_ao_one_e_ints_pts_charge_z(pts_charge_z)
-      IRP_IF MPI
-        call MPI_BCAST(pts_charge_z, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
-        if (ierr /= MPI_SUCCESS) then
-          stop 'Unable to read pts_charge_z with MPI'
-        endif
-      IRP_ENDIF
-    endif
-
-  else
- 
-    integer :: i
-    do i = 1, n_pts_charge
-      pts_charge_z(i) = 0.d0
-    enddo
-
-  endif
- print*,'Point charges '
- do i = 1, n_pts_charge
-  print*,'i,pts_charge_z(i)',i,pts_charge_z(i)
- enddo
-
-END_PROVIDER
-
-
-BEGIN_PROVIDER [ double precision, pts_charge_coord, (n_pts_charge,3) ]
-
-  BEGIN_DOC
-  ! Coordinates of each point charge. 
-  END_DOC
-
-  implicit none
-  logical :: exists
-
-  PROVIDE ezfio_filename
-
-  if (mpi_master) then
-    call ezfio_has_ao_one_e_ints_pts_charge_coord(exists)
-  endif
-
-  IRP_IF MPI_DEBUG
-    print *,  irp_here, mpi_rank
-    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
-  IRP_ENDIF
-
-  IRP_IF MPI
-    include 'mpif.h'
-    integer :: ierr
-    call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
-    if (ierr /= MPI_SUCCESS) then
-      stop 'Unable to read pts_charge_coord with MPI'
-    endif
-  IRP_ENDIF
-
-  if (exists) then
-
-    if (mpi_master) then
-     double precision, allocatable  :: buffer(:,:)
-     allocate (buffer(n_pts_charge,3))
-      write(6,'(A)') '.. >>>>> [ IO READ: pts_charge_coord ] <<<<< ..'
-      call ezfio_get_ao_one_e_ints_pts_charge_coord(buffer)
-      integer :: i,j
-      do i=1,3
-        do j=1,n_pts_charge
-          pts_charge_coord(j,i) = buffer(j,i)
-        enddo
-      enddo
-     deallocate(buffer)
-      IRP_IF MPI
-        call MPI_BCAST(pts_charge_coord, (n_pts_charge), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
-        if (ierr /= MPI_SUCCESS) then
-          stop 'Unable to read pts_charge_coord with MPI'
-        endif
-      IRP_ENDIF
-    endif
-
-  else
- 
-    do i = 1, n_pts_charge
-      pts_charge_coord(i,:) = 0.d0
-    enddo
-
-  endif
- print*,'Coordinates for the point charges '
- do i = 1, n_pts_charge
-  write(*,'(I3,X,3(F16.8,X))') i,pts_charge_coord(i,1:3)
- enddo
-
-END_PROVIDER
-
-! ---
-
-BEGIN_PROVIDER [ double precision, ao_integrals_pt_chrg, (ao_num,ao_num)]
-
-  BEGIN_DOC
-  !  Point charge-electron interaction, in the |AO| basis set.
-  !
-  !  :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle`
-  !
-  !  These integrals also contain the pseudopotential integrals.
-  END_DOC
-
-  implicit none
-  integer          :: num_A, num_B, power_A(3), power_B(3)
-  integer          :: i, j, k, l, n_pt_in, m
-  double precision :: alpha, beta
-  double precision :: A_center(3),B_center(3),C_center(3)
-  double precision :: overlap_x,overlap_y,overlap_z,overlap,dx,NAI_pol_mult
-
-  ao_integrals_pt_chrg = 0.d0
-
-!  if (read_ao_integrals_pt_chrg) then
-!
-!    call ezfio_get_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
-!    print *,  'AO N-e integrals read from disk'
-!
-!  else
-
-!    if(use_cosgtos) then
-!      !print *, " use_cosgtos for ao_integrals_pt_chrg ?", use_cosgtos
-!
-!      do j = 1, ao_num
-!        do i = 1, ao_num
-!          ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg_cosgtos(i,j)
-!        enddo
-!      enddo
-!
-!    else
-
-      !$OMP PARALLEL                                                   &
-          !$OMP DEFAULT (NONE)                                         &
-          !$OMP PRIVATE (i,j,k,l,m,alpha,beta,A_center,B_center,C_center,power_A,power_B,&
-          !$OMP          num_A,num_B,Z,c,c1,n_pt_in)                      &
-          !$OMP SHARED (ao_num,ao_prim_num,ao_expo_ordered_transp,ao_power,ao_nucl,pts_charge_coord,ao_coef_normalized_ordered_transp,nucl_coord,&
-          !$OMP         n_pt_max_integrals,ao_integrals_pt_chrg,n_pts_charge,pts_charge_z)
-
-      n_pt_in = n_pt_max_integrals
-
-      !$OMP DO SCHEDULE (dynamic)
-
-      do j = 1, ao_num
-        num_A = ao_nucl(j)
-        power_A(1:3)= ao_power(j,1:3)
-        A_center(1:3) = nucl_coord(num_A,1:3)
-
-        do i = 1, ao_num
-
-          num_B = ao_nucl(i)
-          power_B(1:3)= ao_power(i,1:3)
-          B_center(1:3) = nucl_coord(num_B,1:3)
-
-          do l=1,ao_prim_num(j)
-            alpha = ao_expo_ordered_transp(l,j)
-
-            do m=1,ao_prim_num(i)
-              beta = ao_expo_ordered_transp(m,i)
-
-              double precision               :: c, c1
-              c = 0.d0
-
-              do  k = 1, n_pts_charge
-                double precision               :: Z
-                Z = pts_charge_z(k)
-
-                C_center(1:3) = pts_charge_coord(k,1:3)
-
-                c1 = NAI_pol_mult( A_center, B_center, power_A, power_B &
-                                 , alpha, beta, C_center, n_pt_in )
-
-                c = c + Z * c1
-
-              enddo
-              ao_integrals_pt_chrg(i,j) = ao_integrals_pt_chrg(i,j)  &
-                  + ao_coef_normalized_ordered_transp(l,j)             &
-                  * ao_coef_normalized_ordered_transp(m,i) * c
-            enddo
-          enddo
-        enddo
-      enddo
-
-    !$OMP END DO
-    !$OMP END PARALLEL
-
-!    endif
-
-
-!    IF(do_pseudo) THEN
-!       ao_integrals_pt_chrg += ao_pseudo_integrals
-!    ENDIF
-
-!  endif
-
-
-!  if (write_ao_integrals_pt_chrg) then
-!    call ezfio_set_ao_one_e_ints_ao_integrals_pt_chrg(ao_integrals_pt_chrg)
-!    print *,  'AO N-e integrals written to disk'
-!  endif
-
-END_PROVIDER

From b817f87fd4b3a0e0fe0855ebf04ce385ee7b1da1 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 31 Jan 2023 08:42:39 +0100
Subject: [PATCH 61/68] Fixes for ARM

---
 config/gfortran.cfg          |  4 +--
 config/gfortran_openblas.cfg | 62 ++++++++++++++++++++++++++++++++++++
 configure                    | 26 ++++++++++++---
 etc/ocaml.rc                 |  8 ++---
 external/qp2-dependencies    |  2 +-
 5 files changed, 90 insertions(+), 12 deletions(-)
 create mode 100644 config/gfortran_openblas.cfg

diff --git a/config/gfortran.cfg b/config/gfortran.cfg
index 56bb6ba4..33ce48ba 100644
--- a/config/gfortran.cfg
+++ b/config/gfortran.cfg
@@ -10,7 +10,7 @@
 #
 #
 [COMMON]
-FC           : gfortran -g -ffree-line-length-none -I . -fPIC
+FC           : gfortran -g -ffree-line-length-none -I . -fPIC -march=native
 LAPACK_LIB   : -lblas -llapack
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED
@@ -22,7 +22,7 @@ IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED
 # 0 : Deactivate
 #
 [OPTION]
-MODE    : DEBUG      ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+MODE    : OPT        ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
 CACHE   : 0          ; Enable cache_compile.py
 OPENMP  : 1          ; Append OpenMP flags
 
diff --git a/config/gfortran_openblas.cfg b/config/gfortran_openblas.cfg
new file mode 100644
index 00000000..ab67d8c3
--- /dev/null
+++ b/config/gfortran_openblas.cfg
@@ -0,0 +1,62 @@
+# Common flags
+##############
+#
+# -ffree-line-length-none : Needed for IRPF90 which produces long lines
+# -lblas -llapack         : Link with libblas and liblapack libraries provided by the system
+# -I .                    : Include the curent directory (Mandatory)
+#
+# --ninja                 : Allow the utilisation of ninja. (Mandatory)
+# --align=32              : Align all provided arrays on a 32-byte boundary
+#
+#
+[COMMON]
+FC           : gfortran -g -ffree-line-length-none -I . -fPIC -march=native
+LAPACK_LIB   : -lopenblas
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+#
+[OPTION]
+MODE    : OPT      ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 0        ; Enable cache_compile.py
+OPENMP  : 1        ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -Ofast                  : Disregard strict standards compliance. Enables all -O3 optimizations.
+#                           It also enables optimizations that are not valid
+#                           for all standard-compliant programs.  It turns on
+#                           -ffast-math and the Fortran-specific
+#                           -fno-protect-parens and -fstack-arrays.
+[OPT]
+FCFLAGS : -Ofast
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC      : -p -g
+FCFLAGS : -Ofast
+
+# Debugging flags
+#################
+#
+# -fcheck=all  : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+#
+[DEBUG]
+FCFLAGS : -g -march=native  -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized  -fbacktrace -ffpe-trap=zero,overflow,underflow -finit-real=nan
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -fopenmp
+IRPF90_FLAGS : --openmp
+
diff --git a/configure b/configure
index 852082e7..59135126 100755
--- a/configure
+++ b/configure
@@ -16,6 +16,25 @@ export CC=gcc
 git submodule init
 git submodule update
 
+# Update ARM or x86 dependencies
+ARCHITECTURE=$(uname -m)
+cd ${QP_ROOT}/external/qp2-dependencies
+echo "Architecture: $ARCHITECTURE"
+case $ARCHITECTURE in
+  aarch64)
+    git checkout arm64
+    ;;
+  x86_64) 
+    git checkout x86
+    ;;
+  *)
+    echo "Unknown architecture. Using x86_64."
+    git checkout x86
+    ;;
+esac
+cd ${QP_ROOT}
+
+
 function help()
 {
     cat <<EOF
@@ -235,10 +254,9 @@ EOF
          execute <<EOF
             source "${QP_ROOT}"/quantum_package.rc
             cd "${QP_ROOT}"/external/
-            tar --gunzip --extract --file qp2-dependencies/ocaml-bundle_x86.tar.gz
-            echo "" | ./ocaml-bundle/bootstrap.sh "${QP_ROOT}"
-            ./ocaml-bundle/configure.sh "${QP_ROOT}"
-            echo "" | ./ocaml-bundle/compile.sh "${QP_ROOT}"
+            tar --gunzip --extract --file qp2-dependencies/opampack.tar.gz
+            cd "${QP_ROOT}"/external/opampack
+            ./install.sh
 EOF
 
     elif [[ ${PACKAGE} = bse ]] ; then
diff --git a/etc/ocaml.rc b/etc/ocaml.rc
index da6de03f..76f38702 100644
--- a/etc/ocaml.rc
+++ b/etc/ocaml.rc
@@ -2,11 +2,9 @@
 
 if [[ -z $OPAMROOT ]]
 then
-
-  # Comment these lines if you have a system-wide OCaml installation
-  export PATH="${QP_ROOT}/external/ocaml-bundle/bootstrap/bin:$PATH"
-  if [[ -f "${QP_ROOT}/external/ocaml-bundle/bootstrap/bin/opam" ]] ; then
-    eval $(opam env --root "${QP_ROOT}/external/ocaml-bundle/opam" --set-root) 
+  export PATH="${QP_ROOT}/external/opampack/:$PATH"
+  if [[ -f "${QP_ROOT}/external/opampack/opam" ]] ; then
+    eval $("${QP_ROOT}/external/opampack/opam" env --root "${QP_ROOT}/external/opampack/opamroot" --set-root) 
   fi
 fi
 source ${OPAMROOT}/opam-init/init.sh > /dev/null 2> /dev/null || true
diff --git a/external/qp2-dependencies b/external/qp2-dependencies
index 90ee61f5..57a1466d 160000
--- a/external/qp2-dependencies
+++ b/external/qp2-dependencies
@@ -1 +1 @@
-Subproject commit 90ee61f5041c7c94a0c605625a264860292813a0
+Subproject commit 57a1466d8707a8b0b1dfef8eede8e4f770b349cf

From 0fbd66d3ec98fe27379b74c71cdb78c35e2e7ef9 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 31 Jan 2023 11:22:51 +0100
Subject: [PATCH 62/68] Fix missing argument in
 on_hrmt_diag_split_degen_s_inv_half

---
 src/non_hermit_dav/new_routines.irp.f | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/non_hermit_dav/new_routines.irp.f b/src/non_hermit_dav/new_routines.irp.f
index 4dea5f66..8db044d3 100644
--- a/src/non_hermit_dav/new_routines.irp.f
+++ b/src/non_hermit_dav/new_routines.irp.f
@@ -377,7 +377,7 @@ subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_e
       print*,'New vectors not bi-orthonormals at ', accu_nd
       call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half, complex_root)
       if(complex_root)then 
-       call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp) ! bi-orthonormalization using QR
+       call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR
       else
        print*,'S^{-1/2} exists !!'
        call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 

From c1188c05ced017374e104c1b55d4d0ac11d0eb75 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 31 Jan 2023 11:30:45 +0100
Subject: [PATCH 63/68] Added ARM config file

---
 config/gfortran_armpl.cfg | 65 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 config/gfortran_armpl.cfg

diff --git a/config/gfortran_armpl.cfg b/config/gfortran_armpl.cfg
new file mode 100644
index 00000000..fb5ee1cc
--- /dev/null
+++ b/config/gfortran_armpl.cfg
@@ -0,0 +1,65 @@
+# Common flags
+##############
+# module load arm
+# module load gnu
+# module load acfl
+#
+# -ffree-line-length-none : Needed for IRPF90 which produces long lines
+# -lblas -llapack         : Link with libblas and liblapack libraries provided by the system
+# -I .                    : Include the curent directory (Mandatory)
+#
+# --ninja                 : Allow the utilisation of ninja. (Mandatory)
+# --align=32              : Align all provided arrays on a 32-byte boundary
+#
+#
+[COMMON]
+FC           : gfortran -g -ffree-line-length-none -I . -fPIC -march=native
+LAPACK_LIB   : -larmpl_lp64
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32 --assert -DSET_NESTED
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+#
+[OPTION]
+MODE    : OPT      ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 0        ; Enable cache_compile.py
+OPENMP  : 1        ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -Ofast                  : Disregard strict standards compliance. Enables all -O3 optimizations.
+#                           It also enables optimizations that are not valid
+#                           for all standard-compliant programs.  It turns on
+#                           -ffast-math and the Fortran-specific
+#                           -fno-protect-parens and -fstack-arrays.
+[OPT]
+FCFLAGS : -Ofast
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC      : -p -g
+FCFLAGS : -Ofast
+
+# Debugging flags
+#################
+#
+# -fcheck=all  : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+#
+[DEBUG]
+FCFLAGS : -g -march=native  -fcheck=all -Waliasing -Wampersand -Wconversion -Wsurprising -Wintrinsics-std -Wno-tabs -Wintrinsic-shadow -Wline-truncation -Wreal-q-constant -Wuninitialized  -fbacktrace -ffpe-trap=zero,overflow,underflow -finit-real=nan
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -fopenmp
+IRPF90_FLAGS : --openmp
+

From 1d4ec1ce93415f93a3f4d4201fcf560850554d88 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 14 Jun 2022 18:10:32 +0200
Subject: [PATCH 64/68] Update guess in CISD (see
 f093592124457451eeb0b6817488b4591cf9295f)

---
 external/qp2-dependencies                     |  2 +-
 src/cisd/cisd.irp.f                           | 37 ++++++++++++++++---
 .../diagonalization_hcsf_dressed.irp.f        | 19 +++-------
 src/determinants/determinants.irp.f           | 18 +++++----
 4 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/external/qp2-dependencies b/external/qp2-dependencies
index 57a1466d..242151e0 160000
--- a/external/qp2-dependencies
+++ b/external/qp2-dependencies
@@ -1 +1 @@
-Subproject commit 57a1466d8707a8b0b1dfef8eede8e4f770b349cf
+Subproject commit 242151e03d1d6bf042387226431d82d35845686a
diff --git a/src/cisd/cisd.irp.f b/src/cisd/cisd.irp.f
index fca3b10e..86333852 100644
--- a/src/cisd/cisd.irp.f
+++ b/src/cisd/cisd.irp.f
@@ -47,6 +47,37 @@ program cisd
   PROVIDE N_states
   read_wf = .False.
   SOFT_TOUCH read_wf
+
+  integer :: i,k
+
+  if(pseudo_sym)then
+   call H_apply_cisd_sym
+  else
+   call H_apply_cisd
+  endif
+  double precision :: r1, r2
+  double precision, allocatable :: U_csf(:,:)
+
+  allocate(U_csf(N_csf,N_states))
+  U_csf = 0.d0
+  U_csf(1,1) = 1.d0
+  do k=2,N_states
+    do i=1,N_csf
+        call random_number(r1)
+        call random_number(r2)
+        r1 = dsqrt(-2.d0*dlog(r1))
+        r2 = dacos(-1.d0)*2.d0*r2
+        U_csf(i,k) = r1*dcos(r2)
+    enddo
+    U_csf(k,k) = U_csf(k,k) +100.d0
+  enddo
+  do k=1,N_states
+    call normalize(U_csf(1,k),N_csf)
+  enddo
+  call convertWFfromCSFtoDET(N_states,U_csf(1,1),psi_coef(1,1))
+  deallocate(U_csf)
+  SOFT_TOUCH psi_coef
+
   call run
 end
 
@@ -56,13 +87,7 @@ subroutine run
   double precision               :: cisdq(N_states), delta_e
   double precision,external      :: diag_h_mat_elem
 
-  if(pseudo_sym)then
-   call H_apply_cisd_sym
-  else
-   call H_apply_cisd
-  endif
   psi_coef = ci_eigenvectors
-  SOFT_TOUCH psi_coef
   call save_wavefunction_truncated(save_threshold)
   call ezfio_set_cisd_energy(CI_energy)
 
diff --git a/src/davidson/diagonalization_hcsf_dressed.irp.f b/src/davidson/diagonalization_hcsf_dressed.irp.f
index f531fb3a..8ecc4d67 100644
--- a/src/davidson/diagonalization_hcsf_dressed.irp.f
+++ b/src/davidson/diagonalization_hcsf_dressed.irp.f
@@ -264,29 +264,20 @@ subroutine davidson_diag_csf_hjj(dets_in,u_in,H_jj,energies,dim_in,sze,sze_csf,N
   ! ===================
 
   converged = .False.
-
+  call convertWFfromDETtoCSF(N_st_diag,u_in(1,1),U_csf(1,1))
   do k=N_st+1,N_st_diag
-    do i=1,sze
+    do i=1,sze_csf
         call random_number(r1)
         call random_number(r2)
         r1 = dsqrt(-2.d0*dlog(r1))
         r2 = dtwo_pi*r2
-        u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st)
+        U_csf(i,k) = r1*dcos(r2) * u_csf(i,k-N_st)
     enddo
-    u_in(k,k) = u_in(k,k) + 10.d0
+    U_csf(k,k) = u_csf(k,k) + 10.d0
   enddo
   do k=1,N_st_diag
-    call normalize(u_in(1,k),sze)
+    call normalize(U_csf(1,k),sze_csf)
   enddo
-
-  do k=1,N_st_diag
-    do i=1,sze
-      U(i,k) = u_in(i,k)
-    enddo
-  enddo
-
-  ! Make random verctors eigenstates of S2
-  call convertWFfromDETtoCSF(N_st_diag,U(1,1),U_csf(1,1))
   call convertWFfromCSFtoDET(N_st_diag,U_csf(1,1),U(1,1))
 
   do while (.not.converged)
diff --git a/src/determinants/determinants.irp.f b/src/determinants/determinants.irp.f
index cb6c14a2..de819330 100644
--- a/src/determinants/determinants.irp.f
+++ b/src/determinants/determinants.irp.f
@@ -77,14 +77,18 @@ BEGIN_PROVIDER [ integer, psi_det_size ]
   END_DOC
   PROVIDE ezfio_filename
   logical                        :: exists
-  if (mpi_master) then
-    call ezfio_has_determinants_n_det(exists)
-    if (exists) then
-      call ezfio_get_determinants_n_det(psi_det_size)
-    else
-      psi_det_size = 1
+  psi_det_size = N_states
+  PROVIDE mpi_master
+  if (read_wf) then
+    if (mpi_master) then
+      call ezfio_has_determinants_n_det(exists)
+      if (exists) then
+        call ezfio_get_determinants_n_det(psi_det_size)
+      else
+        psi_det_size = N_states
+      endif
+      call write_int(6,psi_det_size,'Dimension of the psi arrays')
     endif
-    call write_int(6,psi_det_size,'Dimension of the psi arrays')
   endif
   IRP_IF MPI_DEBUG
     print *,  irp_here, mpi_rank

From f4b5cf7abf5476c517eacf8e9c2018d5e346e442 Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Thu, 19 May 2022 10:28:05 +0200
Subject: [PATCH 65/68] update au to eV

---
 src/cisd/cisd.irp.f | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cisd/cisd.irp.f b/src/cisd/cisd.irp.f
index 86333852..abe66321 100644
--- a/src/cisd/cisd.irp.f
+++ b/src/cisd/cisd.irp.f
@@ -120,8 +120,8 @@ subroutine run
     print*,'******************************'
     print*,'Excitation energies (eV)    (CISD+Q)'
     do i = 2, N_states
-      print*, i ,(CI_energy(i) - CI_energy(1))/0.0367502d0, &
-        (cisdq(i) - cisdq(1)) / 0.0367502d0
+      print*, i ,(CI_energy(i) - CI_energy(1))*27.211396641308d0, &
+        (cisdq(i) - cisdq(1)) * 27.211396641308d0
     enddo
   endif
 

From a7ec66026f651483e1c5e3d324f2cb026e2a43ce Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Thu, 19 May 2022 10:42:29 +0200
Subject: [PATCH 66/68] provder for Ha to eV

---
 src/cisd/cisd.irp.f | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cisd/cisd.irp.f b/src/cisd/cisd.irp.f
index abe66321..e8b5fddf 100644
--- a/src/cisd/cisd.irp.f
+++ b/src/cisd/cisd.irp.f
@@ -120,8 +120,8 @@ subroutine run
     print*,'******************************'
     print*,'Excitation energies (eV)    (CISD+Q)'
     do i = 2, N_states
-      print*, i ,(CI_energy(i) - CI_energy(1))*27.211396641308d0, &
-        (cisdq(i) - cisdq(1)) * 27.211396641308d0
+      print*, i ,(CI_energy(i) - CI_energy(1)) * ha_to_ev, &
+        (cisdq(i) - cisdq(1)) * ha_to_ev
     enddo
   endif
 

From 70562d6336ce1e47a48cb6f62e646646dee8bf68 Mon Sep 17 00:00:00 2001
From: ydamour <yann.damour@hotmail.fr>
Date: Thu, 19 May 2022 11:12:39 +0200
Subject: [PATCH 67/68] cisd(q) only if n_elec >= 4

---
 src/cisd/cisd.irp.f | 59 ++++++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 20 deletions(-)

diff --git a/src/cisd/cisd.irp.f b/src/cisd/cisd.irp.f
index e8b5fddf..3e1e8d97 100644
--- a/src/cisd/cisd.irp.f
+++ b/src/cisd/cisd.irp.f
@@ -94,7 +94,9 @@ subroutine run
   do i = 1,N_states
     k = maxloc(dabs(psi_coef_sorted(1:N_det,i)),dim=1)
     delta_E  = CI_electronic_energy(i) - diag_h_mat_elem(psi_det_sorted(1,1,k),N_int)
-    cisdq(i) = CI_energy(i) + delta_E * (1.d0 - psi_coef_sorted(k,i)**2)
+    if (elec_alpha_num + elec_beta_num >= 4) then
+      cisdq(i) = CI_energy(i) + delta_E * (1.d0 - psi_coef_sorted(k,i)**2)
+    endif
   enddo
   print *,  'N_det = ', N_det
   print*,''
@@ -103,26 +105,43 @@ subroutine run
   do i = 1,N_states
     print *,  i, CI_energy(i)
   enddo
-  print*,''
-  print*,'******************************'
-  print *,  'CISD+Q Energies'
-  do i = 1,N_states
-    print *,  i, cisdq(i)
-  enddo
+  if (elec_alpha_num + elec_beta_num >= 4) then
+    print*,''
+    print*,'******************************'
+    print *,  'CISD+Q Energies'
+    do i = 1,N_states
+      print *,  i, cisdq(i)
+    enddo
+  endif
   if (N_states > 1) then
-    print*,''
-    print*,'******************************'
-    print*,'Excitation energies (au)    (CISD+Q)'
-    do i = 2, N_states
-      print*, i ,CI_energy(i) - CI_energy(1), cisdq(i) - cisdq(1)
-    enddo
-    print*,''
-    print*,'******************************'
-    print*,'Excitation energies (eV)    (CISD+Q)'
-    do i = 2, N_states
-      print*, i ,(CI_energy(i) - CI_energy(1)) * ha_to_ev, &
-        (cisdq(i) - cisdq(1)) * ha_to_ev
-    enddo
+    if (elec_alpha_num + elec_beta_num >= 4) then
+      print*,''
+      print*,'******************************'
+      print*,'Excitation energies (au)    (CISD+Q)'
+      do i = 2, N_states
+        print*, i ,CI_energy(i) - CI_energy(1), cisdq(i) - cisdq(1)
+      enddo
+      print*,''
+      print*,'******************************'
+      print*,'Excitation energies (eV)    (CISD+Q)'
+      do i = 2, N_states
+        print*, i ,(CI_energy(i) - CI_energy(1)) * ha_to_ev, &
+          (cisdq(i) - cisdq(1)) * ha_to_ev
+      enddo
+    else
+      print*,''
+      print*,'******************************'
+      print*,'Excitation energies (au)    (CISD)'
+      do i = 2, N_states
+        print*, i ,CI_energy(i) - CI_energy(1)
+      enddo
+      print*,''
+      print*,'******************************'
+      print*,'Excitation energies (eV)    (CISD)'
+      do i = 2, N_states
+        print*, i ,(CI_energy(i) - CI_energy(1)) * ha_to_ev
+      enddo
+    endif
   endif
 
 end

From b26aa6280a6b6345b062c02caad594486af8b4d5 Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 31 Jan 2023 15:20:39 +0100
Subject: [PATCH 68/68] Revert CISD guess

---
 src/cisd/cisd.irp.f | 65 ++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/src/cisd/cisd.irp.f b/src/cisd/cisd.irp.f
index 3e1e8d97..5447afd2 100644
--- a/src/cisd/cisd.irp.f
+++ b/src/cisd/cisd.irp.f
@@ -47,36 +47,35 @@ program cisd
   PROVIDE N_states
   read_wf = .False.
   SOFT_TOUCH read_wf
-
-  integer :: i,k
-
-  if(pseudo_sym)then
-   call H_apply_cisd_sym
-  else
-   call H_apply_cisd
-  endif
-  double precision :: r1, r2
-  double precision, allocatable :: U_csf(:,:)
-
-  allocate(U_csf(N_csf,N_states))
-  U_csf = 0.d0
-  U_csf(1,1) = 1.d0
-  do k=2,N_states
-    do i=1,N_csf
-        call random_number(r1)
-        call random_number(r2)
-        r1 = dsqrt(-2.d0*dlog(r1))
-        r2 = dacos(-1.d0)*2.d0*r2
-        U_csf(i,k) = r1*dcos(r2)
-    enddo
-    U_csf(k,k) = U_csf(k,k) +100.d0
-  enddo
-  do k=1,N_states
-    call normalize(U_csf(1,k),N_csf)
-  enddo
-  call convertWFfromCSFtoDET(N_states,U_csf(1,1),psi_coef(1,1))
-  deallocate(U_csf)
-  SOFT_TOUCH psi_coef
+!
+!  integer :: i,k
+!
+!  if(pseudo_sym)then
+!   call H_apply_cisd_sym
+!  else
+!   call H_apply_cisd
+!  endif
+!  double precision :: r1, r2
+!  double precision, allocatable :: U_csf(:,:)
+!
+!  allocate(U_csf(N_csf,N_states))
+!  U_csf = 0.d0
+!  do k=1,N_states
+!    do i=1,N_csf
+!        call random_number(r1)
+!        call random_number(r2)
+!        r1 = dsqrt(-2.d0*dlog(r1))
+!        r2 = dacos(-1.d0)*2.d0*r2
+!        U_csf(i,k) = r1*dcos(r2)
+!    enddo
+!    U_csf(k,k) = U_csf(k,k) +10000.d0
+!  enddo
+!  do k=1,N_states
+!    call normalize(U_csf(1,k),N_csf)
+!  enddo
+!  call convertWFfromCSFtoDET(N_states,U_csf(1,1),psi_coef(1,1))
+!  deallocate(U_csf)
+!  SOFT_TOUCH psi_coef
 
   call run
 end
@@ -87,7 +86,13 @@ subroutine run
   double precision               :: cisdq(N_states), delta_e
   double precision,external      :: diag_h_mat_elem
 
+  if(pseudo_sym)then
+   call H_apply_cisd_sym
+  else
+   call H_apply_cisd
+  endif
   psi_coef = ci_eigenvectors
+  SOFT_TOUCH psi_coef
   call save_wavefunction_truncated(save_threshold)
   call ezfio_set_cisd_energy(CI_energy)