diff --git a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
index fe25e9c0..f6df2dbd 100644
--- a/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_erf_gauss.irp.f
@@ -105,7 +105,7 @@ end function phi_j_erf_mu_r_phi
 subroutine erfc_mu_gauss_xyz_ij_ao(i, j, mu, C_center, delta, gauss_ints)
  implicit none
  BEGIN_DOC
-  ! gauss_ints(m) =   \int dr exp(-delta (r - C)^2 ) x/y/z * ( 1 - erf(mu |r-r'|))/ |r-r'| * AO_i(r') * AO_j(r')
+  ! gauss_ints(m) =   \int dr exp(-delta (r - C)^2 ) x/y/z * ( 1 - erf(mu |r-C|))/ |r-C| * AO_i(r) * AO_j(r)
   !
   ! with m = 1 ==> x, m = 2, m = 3 ==> z
   !
diff --git a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
index c058d0d8..fde4daa7 100644
--- a/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
+++ b/src/ao_many_one_e_ints/ao_gaus_gauss.irp.f
@@ -170,7 +170,7 @@ double precision function overlap_gauss_r12_ao_with1s(B_center, beta, D_center,
 
   double precision, external   :: overlap_gauss_r12, overlap_gauss_r12_ao
 
-  ASSERT(beta .gt. 0.d0)
+  ASSERT(beta .ge. 0.d0)
   if(beta .lt. 1d-10) then
     overlap_gauss_r12_ao_with1s = overlap_gauss_r12_ao(D_center, delta, i, j)
     return
diff --git a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
deleted file mode 100644
index 50b4bf96..00000000
--- a/src/ao_many_one_e_ints/grad2_jmu_modif.irp.f
+++ /dev/null
@@ -1,363 +0,0 @@
-
-! ---
-
-BEGIN_PROVIDER [ double precision, int2_grad1u2_grad2u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! -\frac{1}{4} x int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [1 - erf(mu r12)]^2
-  !
-  END_DOC
-
-  implicit none
-  integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), int_fit, expo_fit, coef_fit
-  double precision              :: coef, beta, B_center(3)
-  double precision              :: tmp
-  double precision              :: wall0, wall1
-
-  double precision, external    :: overlap_gauss_r12_ao_with1s
-
-  provide mu_erf final_grid_points j1b_pen
-  call wall_time(wall0)
-
-  int2_grad1u2_grad2u2_j1b2 = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_1_erf_x_2, coef_gauss_1_erf_x_2,         &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_grad1u2_grad2u2_j1b2)
- !$OMP DO
-  !do ipoint = 1, 10
-  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
-
-    do i = 1, ao_num
-      do j = i, ao_num
-
-        tmp = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
-
-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
-
-          do i_fit = 1, n_max_fit_slat
-
-            expo_fit = expo_gauss_1_erf_x_2(i_fit)
-            coef_fit = coef_gauss_1_erf_x_2(i_fit)
-            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
-
-            tmp += -0.25d0 * coef * coef_fit * int_fit
-          enddo
-        enddo
-
-        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = tmp
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  do ipoint = 1, n_points_final_grid
-    do i = 2, ao_num
-      do j = 1, i-1
-        int2_grad1u2_grad2u2_j1b2(j,i,ipoint) = int2_grad1u2_grad2u2_j1b2(i,j,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for int2_grad1u2_grad2u2_j1b2', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
-BEGIN_PROVIDER [ double precision, int2_u2_j1b2, (ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 [u_12^mu]^2
-  !
-  END_DOC
-
-  implicit none
-  integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), int_fit, expo_fit, coef_fit
-  double precision              :: coef, beta, B_center(3), tmp
-  double precision              :: wall0, wall1
-
-  double precision, external    :: overlap_gauss_r12_ao_with1s
-
-  provide mu_erf final_grid_points j1b_pen
-  call wall_time(wall0)
-
-  int2_u2_j1b2 = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_j_mu_x_2, coef_gauss_j_mu_x_2,           &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_u2_j1b2)
- !$OMP DO
-  !do ipoint = 1, 10
-  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
-
-    do i = 1, ao_num
-      do j = i, ao_num
-
-        tmp = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
-
-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
-
-          do i_fit = 1, n_max_fit_slat
-
-            expo_fit = expo_gauss_j_mu_x_2(i_fit)
-            coef_fit = coef_gauss_j_mu_x_2(i_fit)
-            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
-
-            tmp += coef * coef_fit * int_fit
-          enddo
-        enddo
-
-        int2_u2_j1b2(j,i,ipoint) = tmp
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  do ipoint = 1, n_points_final_grid
-    do i = 2, ao_num
-      do j = 1, i-1
-        int2_u2_j1b2(j,i,ipoint) = int2_u2_j1b2(i,j,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for int2_u2_j1b2', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
-BEGIN_PROVIDER [ double precision, int2_u_grad1u_x_j1b2, (3, ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu] r2
-  !
-  END_DOC
-
-  implicit none
-  integer          :: i, j, ipoint, i_1s, i_fit
-  double precision :: r(3), int_fit(3), expo_fit, coef_fit
-  double precision :: coef, beta, B_center(3), dist
-  double precision :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, coef_tmp
-  double precision :: tmp_x, tmp_y, tmp_z
-  double precision :: wall0, wall1
-
-  provide mu_erf final_grid_points j1b_pen
-  call wall_time(wall0)
-
-  int2_u_grad1u_x_j1b2 = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, alpha_1s, dist,        &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp,     & 
- !$OMP          tmp_x, tmp_y, tmp_z)                                & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_u_grad1u_x_j1b2)
- !$OMP DO
-  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
-
-    do i = 1, ao_num
-      do j = i, ao_num
-
-        tmp_x = 0.d0
-        tmp_y = 0.d0
-        tmp_z = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
-
-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
-          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
-                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
-                      + (B_center(3) - r(3)) * (B_center(3) - r(3)) 
-
-          do i_fit = 1, n_max_fit_slat
-
-            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
-            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
-
-            alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s 
-
-            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
-            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
-            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
-
-            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist 
-            !if(expo_coef_1s .gt. 80.d0) cycle
-            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            !if(dabs(coef_tmp) .lt. 1d-10) cycle
-            
-            call NAI_pol_x_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s, 1.d+9, r, int_fit)
-
-            tmp_x += coef_tmp * int_fit(1)
-            tmp_y += coef_tmp * int_fit(2)
-            tmp_z += coef_tmp * int_fit(3)
-          enddo
-        enddo
-
-        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = tmp_x
-        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = tmp_y
-        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = tmp_z
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  do ipoint = 1, n_points_final_grid
-    do i = 2, ao_num
-      do j = 1, i-1
-        int2_u_grad1u_x_j1b2(1,j,i,ipoint) = int2_u_grad1u_x_j1b2(1,i,j,ipoint)
-        int2_u_grad1u_x_j1b2(2,j,i,ipoint) = int2_u_grad1u_x_j1b2(2,i,j,ipoint)
-        int2_u_grad1u_x_j1b2(3,j,i,ipoint) = int2_u_grad1u_x_j1b2(3,i,j,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for int2_u_grad1u_x_j1b2', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
-BEGIN_PROVIDER [ double precision, int2_u_grad1u_j1b2, (ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2)^2 u_12^mu [\grad_1 u_12^mu]
-  !
-  END_DOC
-
-  implicit none
-  integer                       :: i, j, ipoint, i_1s, i_fit
-  double precision              :: r(3), int_fit, expo_fit, coef_fit, coef_tmp
-  double precision              :: coef, beta, B_center(3), dist
-  double precision              :: alpha_1s, alpha_1s_inv, centr_1s(3), expo_coef_1s, tmp
-  double precision              :: wall0, wall1
-  double precision, external    :: NAI_pol_mult_erf_ao_with1s
-
-  provide mu_erf final_grid_points j1b_pen
-  call wall_time(wall0)
-
-  int2_u_grad1u_j1b2 = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp, alpha_1s, dist,   &
- !$OMP          alpha_1s_inv, centr_1s, expo_coef_1s, coef_tmp)     & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b3_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_j_mu_1_erf, coef_gauss_j_mu_1_erf,       &
- !$OMP          List_all_comb_b3_coef, List_all_comb_b3_expo,       & 
- !$OMP          List_all_comb_b3_cent, int2_u_grad1u_j1b2)
- !$OMP DO
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = i, ao_num
-        r(1) = final_grid_points(1,ipoint)
-        r(2) = final_grid_points(2,ipoint)
-        r(3) = final_grid_points(3,ipoint)
-
-        tmp = 0.d0
-        do i_1s = 1, List_all_comb_b3_size
-
-          coef        = List_all_comb_b3_coef  (i_1s)
-          beta        = List_all_comb_b3_expo  (i_1s)
-          B_center(1) = List_all_comb_b3_cent(1,i_1s)
-          B_center(2) = List_all_comb_b3_cent(2,i_1s)
-          B_center(3) = List_all_comb_b3_cent(3,i_1s)
-          dist        = (B_center(1) - r(1)) * (B_center(1) - r(1)) &
-                      + (B_center(2) - r(2)) * (B_center(2) - r(2)) &
-                      + (B_center(3) - r(3)) * (B_center(3) - r(3))
-
-          do i_fit = 1, n_max_fit_slat
-
-            expo_fit = expo_gauss_j_mu_1_erf(i_fit)
-            coef_fit = coef_gauss_j_mu_1_erf(i_fit)
-
-            alpha_1s     = beta + expo_fit
-            alpha_1s_inv = 1.d0 / alpha_1s 
-            centr_1s(1)  = alpha_1s_inv * (beta * B_center(1) + expo_fit * r(1))
-            centr_1s(2)  = alpha_1s_inv * (beta * B_center(2) + expo_fit * r(2))
-            centr_1s(3)  = alpha_1s_inv * (beta * B_center(3) + expo_fit * r(3))
-
-            expo_coef_1s = beta * expo_fit * alpha_1s_inv * dist
-            !if(expo_coef_1s .gt. 80.d0) cycle
-            coef_tmp = coef * coef_fit * dexp(-expo_coef_1s)
-            !if(dabs(coef_tmp) .lt. 1d-10) cycle
-            
-            int_fit = NAI_pol_mult_erf_ao_with1s(i, j, alpha_1s, centr_1s,  1.d+9, r)
-
-            tmp += coef_tmp * int_fit
-          enddo
-        enddo
-
-        int2_u_grad1u_j1b2(j,i,ipoint) = tmp
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  do ipoint = 1, n_points_final_grid
-    do i = 2, ao_num
-      do j = 1, i-1
-        int2_u_grad1u_j1b2(j,i,ipoint) = int2_u_grad1u_j1b2(i,j,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for int2_u_grad1u_j1b2', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
diff --git a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f b/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
deleted file mode 100644
index 552e7069..00000000
--- a/src/ao_many_one_e_ints/grad_lapl_jmu_modif.irp.f
+++ /dev/null
@@ -1,264 +0,0 @@
-
-! ---
-
-BEGIN_PROVIDER [ double precision, v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! int dr phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R| - 1) / |r - R|
-  !
-  END_DOC
-
-  implicit none
-  integer                    :: i, j, ipoint, i_1s
-  double precision           :: r(3), int_mu, int_coulomb
-  double precision           :: coef, beta, B_center(3)
-  double precision           :: tmp
-  double precision           :: wall0, wall1
-  double precision, external :: NAI_pol_mult_erf_ao_with1s
-
-  provide mu_erf final_grid_points j1b_pen
-  call wall_time(wall0)
-
-  v_ij_erf_rk_cst_mu_j1b = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                                         &
- !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, int_mu, int_coulomb, tmp) & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points, &
- !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,   &
- !$OMP          v_ij_erf_rk_cst_mu_j1b, mu_erf)
- !$OMP DO
-  !do ipoint = 1, 10
-  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
-
-    do i = 1, ao_num
-      do j = i, ao_num
-
-        tmp = 0.d0
-        do i_1s = 1, List_all_comb_b2_size
-
-          coef        = List_all_comb_b2_coef  (i_1s)
-          beta        = List_all_comb_b2_expo  (i_1s)
-          B_center(1) = List_all_comb_b2_cent(1,i_1s)
-          B_center(2) = List_all_comb_b2_cent(2,i_1s)
-          B_center(3) = List_all_comb_b2_cent(3,i_1s)
-
-          int_mu      = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r)
-          int_coulomb = NAI_pol_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r)
-
-          tmp += coef * (int_mu - int_coulomb)
-        enddo
-
-        v_ij_erf_rk_cst_mu_j1b(j,i,ipoint) = tmp
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  do ipoint = 1, n_points_final_grid
-    do i = 2, ao_num
-      do j = 1, i-1
-        v_ij_erf_rk_cst_mu_j1b(j,i,ipoint) = v_ij_erf_rk_cst_mu_j1b(i,j,ipoint)
-      enddo
-    enddo
-  enddo
- 
-  call wall_time(wall1)
-  print*, ' wall time for v_ij_erf_rk_cst_mu_j1b', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid, 3)]
-
-  BEGIN_DOC
-  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
-  END_DOC
-
-  implicit none
-  integer          :: i, j, ipoint
-  double precision :: wall0, wall1
-
-  call wall_time(wall0)
-
-  do ipoint = 1, n_points_final_grid
-    do i = 1, ao_num
-      do j = 1, ao_num
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,1) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,2) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint)
-        x_v_ij_erf_rk_cst_mu_j1b(j,i,ipoint,3) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_j1b', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
-BEGIN_PROVIDER [ double precision, x_v_ij_erf_rk_cst_mu_tmp_j1b, (3, ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  ! int dr x phi_i(r) phi_j(r) 1s_j1b(r) (erf(mu(R) |r - R|) - 1)/|r - R|
-  END_DOC
-
-  implicit none
-  integer          :: i, j, ipoint, i_1s
-  double precision :: coef, beta, B_center(3), r(3), ints(3), ints_coulomb(3)
-  double precision :: tmp_x, tmp_y, tmp_z
-  double precision :: wall0, wall1
-
-  call wall_time(wall0)
-
-  x_v_ij_erf_rk_cst_mu_tmp_j1b = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                                        &
- !$OMP PRIVATE (ipoint, i, j, i_1s, r, coef, beta, B_center, ints, ints_coulomb,      & 
- !$OMP          tmp_x, tmp_y, tmp_z)                                                  & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, final_grid_points,&
- !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo, List_all_comb_b2_cent,  &
- !$OMP          x_v_ij_erf_rk_cst_mu_tmp_j1b, mu_erf)
- !$OMP DO
-  !do ipoint = 1, 10
-  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
-
-    do i = 1, ao_num
-      do j = i, ao_num
-
-        tmp_x = 0.d0
-        tmp_y = 0.d0
-        tmp_z = 0.d0
-        do i_1s = 1, List_all_comb_b2_size
-
-          coef        = List_all_comb_b2_coef  (i_1s)
-          beta        = List_all_comb_b2_expo  (i_1s)
-          B_center(1) = List_all_comb_b2_cent(1,i_1s)
-          B_center(2) = List_all_comb_b2_cent(2,i_1s)
-          B_center(3) = List_all_comb_b2_cent(3,i_1s)
-
-          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center, mu_erf, r, ints        )
-          call NAI_pol_x_mult_erf_ao_with1s(i, j, beta, B_center,  1.d+9, r, ints_coulomb)
-
-          tmp_x += coef * (ints(1) - ints_coulomb(1))
-          tmp_y += coef * (ints(2) - ints_coulomb(2))
-          tmp_z += coef * (ints(3) - ints_coulomb(3))
-        enddo
-
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = tmp_x
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = tmp_y
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = tmp_z
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  do ipoint = 1, n_points_final_grid
-    do i = 2, ao_num
-      do j = 1, i-1
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(1,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(1,i,j,ipoint)
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(2,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(2,i,j,ipoint)
-        x_v_ij_erf_rk_cst_mu_tmp_j1b(3,j,i,ipoint) = x_v_ij_erf_rk_cst_mu_tmp_j1b(3,i,j,ipoint)
-      enddo
-    enddo
-  enddo
-
-  call wall_time(wall1)
-  print*, ' wall time for x_v_ij_erf_rk_cst_mu_tmp_j1b', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
-! TODO analytically
-BEGIN_PROVIDER [ double precision, v_ij_u_cst_mu_j1b, (ao_num, ao_num, n_points_final_grid)]
-
-  BEGIN_DOC
-  !
-  ! int dr2 phi_i(r2) phi_j(r2) 1s_j1b(r2) u(mu, r12)
-  !
-  END_DOC
-
-  implicit none
-  integer                    :: i, j, ipoint, i_1s, i_fit
-  double precision           :: r(3), int_fit, expo_fit, coef_fit
-  double precision           :: coef, beta, B_center(3)
-  double precision           :: tmp
-  double precision           :: wall0, wall1
-
-  double precision, external :: overlap_gauss_r12_ao_with1s
-
-  provide mu_erf final_grid_points j1b_pen
-  call wall_time(wall0)
-
-  v_ij_u_cst_mu_j1b = 0.d0
-
- !$OMP PARALLEL DEFAULT (NONE)                                      &
- !$OMP PRIVATE (ipoint, i, j, i_1s, i_fit, r, coef, beta, B_center, &
- !$OMP          coef_fit, expo_fit, int_fit, tmp)                   & 
- !$OMP SHARED  (n_points_final_grid, ao_num, List_all_comb_b2_size, & 
- !$OMP          final_grid_points, n_max_fit_slat,                  &
- !$OMP          expo_gauss_j_mu_x, coef_gauss_j_mu_x,               &
- !$OMP          List_all_comb_b2_coef, List_all_comb_b2_expo,       & 
- !$OMP          List_all_comb_b2_cent, v_ij_u_cst_mu_j1b)
- !$OMP DO
-  !do ipoint = 1, 10
-  do ipoint = 1, n_points_final_grid
-    r(1) = final_grid_points(1,ipoint)
-    r(2) = final_grid_points(2,ipoint)
-    r(3) = final_grid_points(3,ipoint)
-
-    do i = 1, ao_num
-      do j = i, ao_num
-
-        tmp = 0.d0
-        do i_1s = 1, List_all_comb_b2_size
-
-          coef        = List_all_comb_b2_coef  (i_1s)
-          beta        = List_all_comb_b2_expo  (i_1s)
-          B_center(1) = List_all_comb_b2_cent(1,i_1s)
-          B_center(2) = List_all_comb_b2_cent(2,i_1s)
-          B_center(3) = List_all_comb_b2_cent(3,i_1s)
-
-          do i_fit = 1, n_max_fit_slat
-
-            expo_fit = expo_gauss_j_mu_x(i_fit)
-            coef_fit = coef_gauss_j_mu_x(i_fit)
-            int_fit  = overlap_gauss_r12_ao_with1s(B_center, beta, r, expo_fit, i, j)
-
-            tmp += coef * coef_fit * int_fit
-          enddo
-        enddo
-
-        v_ij_u_cst_mu_j1b(j,i,ipoint) = tmp
-      enddo
-    enddo
-  enddo
- !$OMP END DO
- !$OMP END PARALLEL
-
-  do ipoint = 1, n_points_final_grid
-    do i = 2, ao_num
-      do j = 1, i-1
-        v_ij_u_cst_mu_j1b(j,i,ipoint) = v_ij_u_cst_mu_j1b(i,j,ipoint)
-      enddo
-    enddo
-  enddo
- 
-  call wall_time(wall1)
-  print*, ' wall time for v_ij_u_cst_mu_j1b', wall1 - wall0
-
-END_PROVIDER 
-
-! ---
-
diff --git a/src/ao_many_one_e_ints/listj1b.irp.f b/src/ao_many_one_e_ints/listj1b.irp.f
deleted file mode 100644
index ff9f8ae5..00000000
--- a/src/ao_many_one_e_ints/listj1b.irp.f
+++ /dev/null
@@ -1,227 +0,0 @@
-
-! ---
-
-BEGIN_PROVIDER [ integer, List_all_comb_b2_size]
-
-  implicit none
-
-  List_all_comb_b2_size = 2**nucl_num
-
-END_PROVIDER
-
-! ---
-
-BEGIN_PROVIDER [ integer, List_all_comb_b2, (nucl_num, List_all_comb_b2_size)]
-
-  implicit none
-  integer :: i, j
-
-  if(nucl_num .gt. 32) then
-    print *, ' nucl_num = ', nucl_num, '> 32'
-    stop
-  endif
-
-  List_all_comb_b2 = 0
-
-  do i = 0, List_all_comb_b2_size-1
-    do j = 0, nucl_num-1
-      if (btest(i,j)) then
-        List_all_comb_b2(j+1,i+1) = 1
-      endif
-    enddo
-  enddo
-
-END_PROVIDER
-
-! ---
-
- BEGIN_PROVIDER [ double precision, List_all_comb_b2_coef, (   List_all_comb_b2_size)]
-&BEGIN_PROVIDER [ double precision, List_all_comb_b2_expo, (   List_all_comb_b2_size)]
-&BEGIN_PROVIDER [ double precision, List_all_comb_b2_cent, (3, List_all_comb_b2_size)]
-
-  implicit none
-  integer          :: i, j, k, phase
-  double precision :: tmp_alphaj, tmp_alphak
-  double precision :: tmp_cent_x, tmp_cent_y, tmp_cent_z
-
-  provide j1b_pen
-
-  List_all_comb_b2_coef = 0.d0
-  List_all_comb_b2_expo = 0.d0
-  List_all_comb_b2_cent = 0.d0
-
-  do i = 1, List_all_comb_b2_size
-
-    tmp_cent_x = 0.d0
-    tmp_cent_y = 0.d0
-    tmp_cent_z = 0.d0
-    do j = 1, nucl_num
-      tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
-      List_all_comb_b2_expo(i) += tmp_alphaj
-      tmp_cent_x               += tmp_alphaj * nucl_coord(j,1)
-      tmp_cent_y               += tmp_alphaj * nucl_coord(j,2)
-      tmp_cent_z               += tmp_alphaj * nucl_coord(j,3)
-    enddo
-
-    ASSERT(List_all_comb_b2_expo(i) .gt. 0d0)
-    if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
-
-    List_all_comb_b2_cent(1,i) = tmp_cent_x / List_all_comb_b2_expo(i) 
-    List_all_comb_b2_cent(2,i) = tmp_cent_y / List_all_comb_b2_expo(i)
-    List_all_comb_b2_cent(3,i) = tmp_cent_z / List_all_comb_b2_expo(i)
-  enddo
-
-  ! ---
-
-  do i = 1, List_all_comb_b2_size
-
-    do j = 2, nucl_num, 1
-      tmp_alphaj = dble(List_all_comb_b2(j,i)) * j1b_pen(j)
-      do k = 1, j-1, 1
-        tmp_alphak = dble(List_all_comb_b2(k,i)) * j1b_pen(k)
-
-        List_all_comb_b2_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) &
-                                                              + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) &
-                                                              + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) )
-      enddo
-    enddo
-
-    if(List_all_comb_b2_expo(i) .lt. 1d-10) cycle
-
-    List_all_comb_b2_coef(i) = List_all_comb_b2_coef(i) / List_all_comb_b2_expo(i)
-  enddo
-
-  ! ---
-
-  do i = 1, List_all_comb_b2_size
-
-    phase = 0
-    do j = 1, nucl_num
-      phase += List_all_comb_b2(j,i)
-    enddo
-
-    List_all_comb_b2_coef(i) = (-1.d0)**dble(phase) * dexp(-List_all_comb_b2_coef(i))
-  enddo
-
-END_PROVIDER
-
-! ---
-
-BEGIN_PROVIDER [ integer, List_all_comb_b3_size]
-
-  implicit none
-
-  List_all_comb_b3_size = 3**nucl_num
-
-END_PROVIDER
-
-! ---
-
-BEGIN_PROVIDER [ integer, List_all_comb_b3, (nucl_num, List_all_comb_b3_size)]
-
-  implicit none
-  integer              :: i, j, ii, jj
-  integer, allocatable :: M(:,:), p(:)
-
-  if(nucl_num .gt. 32) then
-    print *, ' nucl_num = ', nucl_num, '> 32'
-    stop
-  endif
-
-  List_all_comb_b3(:,:)                     = 0
-  List_all_comb_b3(:,List_all_comb_b3_size) = 2
-
-  allocate(p(nucl_num))
-  p = 0
-
-  do i = 2, List_all_comb_b3_size-1
-    do j = 1, nucl_num
-
-      ii = 0
-      do jj = 1, j-1, 1
-        ii = ii + p(jj) * 3**(jj-1)
-      enddo
-      p(j) = modulo(i-1-ii, 3**j) / 3**(j-1)
-
-      List_all_comb_b3(j,i) = p(j)
-    enddo
-  enddo
-
-END_PROVIDER
-
-! ---
-
- BEGIN_PROVIDER [ double precision, List_all_comb_b3_coef, (   List_all_comb_b3_size)]
-&BEGIN_PROVIDER [ double precision, List_all_comb_b3_expo, (   List_all_comb_b3_size)]
-&BEGIN_PROVIDER [ double precision, List_all_comb_b3_cent, (3, List_all_comb_b3_size)]
-
-  implicit none
-  integer          :: i, j, k, phase
-  double precision :: tmp_alphaj, tmp_alphak, facto
-
-  provide j1b_pen
-
-  List_all_comb_b3_coef = 0.d0
-  List_all_comb_b3_expo = 0.d0
-  List_all_comb_b3_cent = 0.d0
-
-  do i = 1, List_all_comb_b3_size
-
-    do j = 1, nucl_num
-      tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
-
-      List_all_comb_b3_expo(i)   += tmp_alphaj
-      List_all_comb_b3_cent(1,i) += tmp_alphaj * nucl_coord(j,1)
-      List_all_comb_b3_cent(2,i) += tmp_alphaj * nucl_coord(j,2)
-      List_all_comb_b3_cent(3,i) += tmp_alphaj * nucl_coord(j,3)
-
-    enddo
-
-    ASSERT(List_all_comb_b3_expo(i) .gt. 0d0)
-    if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
-
-    List_all_comb_b3_cent(1,i) = List_all_comb_b3_cent(1,i) / List_all_comb_b3_expo(i) 
-    List_all_comb_b3_cent(2,i) = List_all_comb_b3_cent(2,i) / List_all_comb_b3_expo(i)
-    List_all_comb_b3_cent(3,i) = List_all_comb_b3_cent(3,i) / List_all_comb_b3_expo(i)
-  enddo
-
-  ! ---
-
-  do i = 1, List_all_comb_b3_size
-
-    do j = 2, nucl_num, 1
-      tmp_alphaj = dble(List_all_comb_b3(j,i)) * j1b_pen(j)
-      do k = 1, j-1, 1
-        tmp_alphak = dble(List_all_comb_b3(k,i)) * j1b_pen(k)
-
-        List_all_comb_b3_coef(i) += tmp_alphaj * tmp_alphak * ( (nucl_coord(j,1) - nucl_coord(k,1)) * (nucl_coord(j,1) - nucl_coord(k,1)) &
-                                                              + (nucl_coord(j,2) - nucl_coord(k,2)) * (nucl_coord(j,2) - nucl_coord(k,2)) &
-                                                              + (nucl_coord(j,3) - nucl_coord(k,3)) * (nucl_coord(j,3) - nucl_coord(k,3)) )
-      enddo
-    enddo
-
-    if(List_all_comb_b3_expo(i) .lt. 1d-10) cycle
-
-    List_all_comb_b3_coef(i) = List_all_comb_b3_coef(i) / List_all_comb_b3_expo(i)
-  enddo
-
-  ! ---
-
-  do i = 1, List_all_comb_b3_size
-
-    facto = 1.d0
-    phase = 0
-    do j = 1, nucl_num
-      tmp_alphaj = dble(List_all_comb_b3(j,i)) 
-
-      facto *= 2.d0 / (gamma(tmp_alphaj+1.d0) * gamma(3.d0-tmp_alphaj))
-      phase += List_all_comb_b3(j,i)
-    enddo
-
-    List_all_comb_b3_coef(i) = (-1.d0)**dble(phase) * facto * dexp(-List_all_comb_b3_coef(i))
-  enddo
-
-END_PROVIDER
-
-! ---
-
diff --git a/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f b/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f
index 641d25fe..4c2b65a6 100644
--- a/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f
+++ b/src/ao_many_one_e_ints/prim_int_erf_gauss.irp.f
@@ -142,7 +142,7 @@ double precision function erf_mu_gauss(D_center,delta,mu,A_center,B_center,power
   !
   ! .. math::
   ! 
-  !   \int dr exp(-delta (r - D)^2 ) erf(mu*|r-r'|)/ |r-r'| * (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
+  !   \int dr exp(-delta (r - D)^2 ) erf(mu*|r-D|)/ |r-D| * (x-A_x)^a (x-B_x)^b \exp(-\alpha (x-A_x)^2 - \beta (x-B_x)^2 )
   !
   END_DOC
 
diff --git a/src/ao_tc_eff_map/EZFIO.cfg b/src/ao_tc_eff_map/EZFIO.cfg
deleted file mode 100644
index 1df2a130..00000000
--- a/src/ao_tc_eff_map/EZFIO.cfg
+++ /dev/null
@@ -1,18 +0,0 @@
-
-[j1b_pen]
-type: double precision
-doc: exponents of the 1-body Jastrow
-interface: ezfio
-size: (nuclei.nucl_num)
-
-[j1b_coeff]
-type: double precision
-doc: coeff of the 1-body Jastrow
-interface: ezfio
-size: (nuclei.nucl_num)
-
-[j1b_type]
-type: integer
-doc: type of 1-body Jastrow
-interface: ezfio, provider, ocaml
-default: 0
diff --git a/src/ao_tc_eff_map/NEED b/src/ao_tc_eff_map/NEED
index 38638c7c..d9edb325 100644
--- a/src/ao_tc_eff_map/NEED
+++ b/src/ao_tc_eff_map/NEED
@@ -2,3 +2,4 @@ ao_two_e_erf_ints
 mo_one_e_ints
 ao_many_one_e_ints
 dft_utils_in_r
+tc_keywords
diff --git a/src/cipsi_tc_bi_ortho/EZFIO.cfg b/src/cipsi_tc_bi_ortho/EZFIO.cfg
new file mode 100644
index 00000000..7fcf19eb
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/EZFIO.cfg
@@ -0,0 +1,36 @@
+[save_wf_after_selection]
+type: logical
+doc: If true, saves the wave function after the selection, before the diagonalization
+interface: ezfio,provider,ocaml
+default: False
+
+[seniority_max]
+type: integer
+doc: Maximum number of allowed open shells. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
+[excitation_ref]
+type: integer
+doc: 1: Hartree-Fock determinant, 2:All determinants of the dominant configuration
+interface: ezfio,ocaml,provider
+default: 1
+
+[excitation_max]
+type: integer
+doc: Maximum number of excitation with respect to the Hartree-Fock determinant. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
+[excitation_alpha_max]
+type: integer
+doc: Maximum number of excitation for alpha determinants with respect to the Hartree-Fock determinant. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
+[excitation_beta_max]
+type: integer
+doc: Maximum number of excitation for beta determinants with respect to the Hartree-Fock determinant. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
diff --git a/src/cipsi_tc_bi_ortho/NEED b/src/cipsi_tc_bi_ortho/NEED
new file mode 100644
index 00000000..4dd1af36
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/NEED
@@ -0,0 +1,6 @@
+mpi
+perturbation
+zmq
+iterations_tc
+csf
+tc_bi_ortho
diff --git a/src/cipsi_tc_bi_ortho/cipsi.irp.f b/src/cipsi_tc_bi_ortho/cipsi.irp.f
new file mode 100644
index 00000000..b1941068
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/cipsi.irp.f
@@ -0,0 +1,136 @@
+subroutine run_cipsi
+
+  BEGIN_DOC
+  ! Selected Full Configuration Interaction with deterministic selection and
+  ! stochastic PT2.
+  END_DOC
+
+  use selection_types
+
+  implicit none
+
+  integer                        :: i,j,k,ndet
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  double precision, allocatable  :: zeros(:)
+  integer                        :: to_select
+  logical, external :: qp_stop
+
+  double precision :: threshold_generators_save
+  double precision :: rss
+  double precision, external :: memory_of_double
+  double precision :: correlation_energy_ratio,E_denom,E_tc,norm
+
+  PROVIDE H_apply_buffer_allocated distributed_davidson
+
+  print*,'Diagonal elements of the Fock matrix '
+  do i = 1, mo_num
+   write(*,*)i,Fock_matrix_tc_mo_tot(i,i)
+  enddo
+
+  N_iter = 1
+  threshold_generators = 1.d0
+  SOFT_TOUCH threshold_generators
+
+  rss = memory_of_double(N_states)*4.d0
+  call check_mem(rss,irp_here)
+
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+
+  double precision               :: hf_energy_ref
+  logical                        :: has, print_pt2
+  double precision               :: relative_error
+
+  relative_error=PT2_relative_error
+
+  zeros = 0.d0
+  pt2_data % pt2  = -huge(1.e0)
+  pt2_data % rpt2 = -huge(1.e0)
+  pt2_data % overlap(:,:) = 0.d0
+  pt2_data % variance = huge(1.e0)
+
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  print_pt2 = .False.
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+
+  call ezfio_has_hartree_fock_energy(has)
+  if (has) then
+    call ezfio_get_hartree_fock_energy(hf_energy_ref)
+  else
+    hf_energy_ref = ref_bitmask_energy
+  endif
+
+  if (N_det > N_det_max) then
+    psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det)
+    psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states)
+    N_det = N_det_max
+    soft_touch N_det psi_det psi_coef
+    if (s2_eig) then
+      call make_s2_eigenfunction
+    endif
+    print_pt2 = .False.
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!    call routine_save_right
+  endif
+
+  correlation_energy_ratio = 0.d0
+
+  print_pt2 = .True.
+  do while (                                                         &
+        (N_det < N_det_max) .and.                                    &
+        (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max)          &
+        )
+      write(*,'(A)')  '--------------------------------------------------------------------------------'
+
+
+    to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
+    to_select = max(N_states_diag, to_select)
+
+    E_denom = E_tc ! TC Energy of the current wave function 
+    if (do_pt2) then
+      call pt2_dealloc(pt2_data)
+      call pt2_dealloc(pt2_data_err)
+      call pt2_alloc(pt2_data, N_states)
+      call pt2_alloc(pt2_data_err, N_states)
+      threshold_generators_save = threshold_generators
+      threshold_generators = 1.d0
+      SOFT_TOUCH threshold_generators
+      call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
+      threshold_generators = threshold_generators_save
+      SOFT_TOUCH threshold_generators
+    else
+      call pt2_dealloc(pt2_data)
+      call pt2_alloc(pt2_data, N_states)
+      call ZMQ_selection(to_select, pt2_data)
+    endif
+
+    N_iter += 1
+
+    if (qp_stop()) exit
+
+    ! Add selected determinants
+    call copy_H_apply_buffer_to_wf()
+
+    if (save_wf_after_selection) then
+      call save_wavefunction
+    endif
+
+    PROVIDE  psi_coef
+    PROVIDE  psi_det
+    PROVIDE  psi_det_sorted_tc
+
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+    if (qp_stop()) exit
+  enddo
+
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+  call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+
+end
diff --git a/src/cipsi_tc_bi_ortho/energy.irp.f b/src/cipsi_tc_bi_ortho/energy.irp.f
new file mode 100644
index 00000000..16f4528e
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/energy.irp.f
@@ -0,0 +1,51 @@
+BEGIN_PROVIDER [ logical, initialize_pt2_E0_denominator ]
+ implicit none
+ BEGIN_DOC
+ ! If true, initialize pt2_E0_denominator
+ END_DOC
+ initialize_pt2_E0_denominator = .True.
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! E0 in the denominator of the PT2
+ END_DOC
+ integer :: i,j
+
+  pt2_E0_denominator = eigval_right_tc_bi_orth
+
+! if (initialize_pt2_E0_denominator) then
+!   if (h0_type == "EN") then
+!     pt2_E0_denominator(1:N_states) = psi_energy(1:N_states)
+!   else if (h0_type == "HF") then
+!     do i=1,N_states
+!       j = maxloc(abs(psi_coef(:,i)),1)
+!       pt2_E0_denominator(i) = psi_det_hii(j)
+!     enddo
+!   else if (h0_type == "Barycentric") then
+!     pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states)
+!   else if (h0_type == "CFG") then
+!     pt2_E0_denominator(1:N_states) = psi_energy(1:N_states)
+!   else
+!     print *,  h0_type, ' not implemented'
+!     stop
+!   endif
+!  do i=1,N_states
+!    call write_double(6,pt2_E0_denominator(i)+nuclear_repulsion, 'PT2 Energy denominator')
+!  enddo
+! else
+!   pt2_E0_denominator = -huge(1.d0)
+! endif
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, pt2_overlap, (N_states, N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Overlap between the perturbed wave functions
+ END_DOC
+ pt2_overlap(1:N_states,1:N_states) = 0.d0
+END_PROVIDER
+
diff --git a/src/cipsi_tc_bi_ortho/environment.irp.f b/src/cipsi_tc_bi_ortho/environment.irp.f
new file mode 100644
index 00000000..5c0e0820
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/environment.irp.f
@@ -0,0 +1,14 @@
+BEGIN_PROVIDER [ integer, nthreads_pt2 ]
+ implicit none
+ BEGIN_DOC
+ ! Number of threads for Davidson
+ END_DOC
+ nthreads_pt2 = nproc
+ character*(32) :: env
+ call getenv('QP_NTHREADS_PT2',env)
+ if (trim(env) /= '') then
+   read(env,*) nthreads_pt2
+   call write_int(6,nthreads_pt2,'Target number of threads for PT2')
+ endif
+END_PROVIDER
+
diff --git a/src/cipsi_tc_bi_ortho/get_d.irp.f b/src/cipsi_tc_bi_ortho/get_d.irp.f
new file mode 100644
index 00000000..c642f420
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/get_d.irp.f
@@ -0,0 +1,1735 @@
+
+! ---
+
+double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint)
+
+  use bitmasks
+  implicit none
+
+  integer,           intent(in) :: Nint
+  integer,           intent(in) :: s1, s2, h1, h2, p1, p2
+  integer(bit_kind), intent(in) :: phasemask(Nint,2)
+
+  double precision, save        :: res(0:1) = (/1d0, -1d0/)
+
+  integer                       :: np
+  integer                       :: h1_int, h2_int
+  integer                       :: p1_int, p2_int
+  integer                       :: h1_bit, h2_bit
+  integer                       :: p1_bit, p2_bit
+  logical                       :: change
+
+  h1_int = shiftr(h1-1,bit_kind_shift)+1
+  h1_bit = h1 - shiftl(h1_int-1,bit_kind_shift)-1
+
+  h2_int = shiftr(h2-1,bit_kind_shift)+1
+  h2_bit = h2 - shiftl(h2_int-1,bit_kind_shift)-1
+
+  p1_int = shiftr(p1-1,bit_kind_shift)+1
+  p1_bit = p1 - shiftl(p1_int-1,bit_kind_shift)-1
+
+  p2_int = shiftr(p2-1,bit_kind_shift)+1
+  p2_bit = p2 - shiftl(p2_int-1,bit_kind_shift)-1
+
+  ! Put the phasemask bits at position 0, and add them all
+  h1_bit = int( shiftr( phasemask(h1_int,s1), h1_bit ) )
+  p1_bit = int( shiftr( phasemask(p1_int,s1), p1_bit ) )
+  h2_bit = int( shiftr( phasemask(h2_int,s2), h2_bit ) )
+  p2_bit = int( shiftr( phasemask(p2_int,s2), p2_bit ) )
+
+  np = h1_bit + p1_bit + h2_bit + p2_bit
+
+  if(p1 < h1) np = np + 1
+  if(p2 < h2) np = np + 1
+
+  if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1
+  get_phase_bi = res(iand(np,1))
+
+end function get_phase_bi
+
+! ---
+
+subroutine get_d3_htc(gen, bannedOrb, banned, mat_m, mat_p, mask, p, sp, rcoefs, lcoefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)   :: gen(N_int, 2), mask(N_int, 2)
+  integer,           intent(in)   :: p(0:4,2), sp
+  logical,           intent(in)   :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  double precision,  intent(in)   :: rcoefs(N_states), lcoefs(N_states)
+  double precision, intent(inout) :: mat_m(N_states, mo_num, mo_num), mat_p(N_states, mo_num, mo_num)
+
+  integer(bit_kind)               :: det(N_int, 2)
+  integer                         :: k, h1, h2, p1, p2, puti, putj
+  double precision                :: i_h_alpha, alpha_h_i 
+  logical                         :: ok
+
+  if(sp == 3) then ! AB
+
+    h1 = p(1,1)
+    h2 = p(1,2)
+    do p1 = 1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+      do p2 = 1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, 1)) cycle ! rentable?
+
+        call apply_particles(mask, 1, p1, 2, p2, det, ok, N_int)
+        call htilde_mu_mat_bi_ortho_tot(gen, det, N_int, i_h_alpha)
+        call htilde_mu_mat_bi_ortho_tot(det,gen, N_int, alpha_h_i)
+!        call hji_hij_mu_mat_tot(gen, det, N_int,i_h_alpha , alpha_h_i)
+        if( dabs(alpha_h_i) .gt. 0.d0) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k = 1, N_states
+            mat_p(k, p1, p2) = mat_p(k, p1, p2) + rcoefs(k) * alpha_h_i 
+          enddo
+        endif
+        if( dabs(i_h_alpha) .gt. 0.d0) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k = 1, N_states
+            mat_m(k, p1, p2) = mat_m(k, p1, p2) + lcoefs(k) * i_h_alpha 
+          enddo
+        endif
+
+      enddo
+    enddo
+
+  else ! AA BB
+
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti = 1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+      do putj = puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, 1)) cycle ! rentable?
+
+        call apply_particles(mask, sp, puti, sp, putj, det, ok, N_int)
+!        call hji_hij_mu_mat_tot(gen, det, N_int, i_h_alpha, alpha_h_i)
+        call htilde_mu_mat_bi_ortho_tot(gen, det, N_int, i_h_alpha)
+        call htilde_mu_mat_bi_ortho_tot( det,gen, N_int, alpha_h_i)
+        if( dabs(alpha_h_i) .gt. 0.d0) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k = 1, N_states
+            mat_p(k, puti, putj) = mat_p(k, puti, putj) + rcoefs(k) * alpha_h_i 
+          enddo
+        endif
+        if( dabs(i_h_alpha) .gt. 0.d0) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k = 1, N_states
+            mat_m(k, puti, putj) = mat_m(k, puti, putj) + lcoefs(k) * i_h_alpha 
+          enddo
+        endif
+
+      enddo
+    enddo
+
+  endif
+
+end subroutine get_d3_htc
+
+! ---
+
+subroutine get_d3_h(gen, bannedOrb, banned, mat, mask, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)   :: gen(N_int, 2), mask(N_int, 2)
+  integer,           intent(in)   :: p(0:4,2), sp
+  logical,           intent(in)   :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  double precision,  intent(in)   :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+
+  integer(bit_kind)               :: det(N_int, 2)
+  integer                         :: k, h1, h2, p1, p2, puti, putj
+  double precision                :: hij
+  logical                         :: ok
+
+  if(sp == 3) then ! AB
+
+    h1 = p(1,1)
+    h2 = p(1,2)
+    do p1 = 1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+      do p2 = 1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, 1)) cycle ! rentable?
+
+        call apply_particles(mask, 1, p1, 2, p2, det, ok, N_int)
+        call i_h_j(gen, det, N_int, hij)
+        if (hij == 0.d0) cycle
+        !DIR$ LOOP COUNT AVG(4)
+        do k = 1, N_states
+          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij
+        enddo
+
+      enddo
+    enddo
+
+  else ! AA BB
+
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti = 1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+      do putj = puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, 1)) cycle ! rentable?
+
+        call apply_particles(mask, sp, puti, sp, putj, det, ok, N_int)
+        call i_h_j(gen, det, N_int, hij)
+        !DIR$ LOOP COUNT AVG(4)
+        do k = 1, N_states
+          mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+        enddo
+
+      enddo
+    enddo
+
+  endif
+
+end subroutine get_d3_h
+
+! ---
+
+subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  double precision, intent(in) :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  double precision, external :: get_phase_bi, mo_two_e_integral
+
+  integer :: i, j, k, tip, ma, mi, puti, putj
+  integer :: h1, h2, p1, p2, i1, i2
+  double precision :: hij, phase
+
+  integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
+  integer, parameter :: turn2(2) = (/2, 1/)
+  integer, parameter :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer :: bant
+  bant = 1
+
+  tip = p(0,1) * p(0,2)
+
+  ma = sp
+  if(p(0,1) > p(0,2)) ma = 1
+  if(p(0,1) < p(0,2)) ma = 2
+  mi = mod(ma, 2) + 1
+
+  if(sp == 3) then
+    if(ma == 2) bant = 2
+
+    if(tip == 3) then
+      puti = p(1, mi)
+      if(bannedOrb(puti, mi)) return
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+
+      do i = 1, 3
+        putj = p(i, ma)
+        if(banned(putj,puti,bant)) cycle
+        i1 = turn3(1,i)
+        i2 = turn3(2,i)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+
+        ! <p1 p2|1/r12|h1 h2> --> <p1 p2| w_ee^h + t^nh | h1 h2> --> < p2 p1 | H^tilde| h1 h2 >
+        ! 
+        !                      <p1 p2 | h1 h2>        -            <p2 p1 | h1 h2 >
+        ! < p2 p1 | H^tilde^dag| h1 h2 > = < h1 h2 | w_ee^h + t^nh | p1 p2 >
+        hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2, p1, h1, h2)
+        if (hij == 0.d0) cycle
+
+        hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+
+        if(ma == 1) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij
+          enddo
+        else
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+          enddo
+        end if
+      end do
+    else
+      h1 = h(1,1)
+      h2 = h(1,2)
+      do j = 1,2
+        putj = p(j, 2)
+        if(bannedOrb(putj, 2)) cycle
+        p2 = p(turn2(j), 2)
+        do i = 1,2
+          puti = p(i, 1)
+
+          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
+          p1 = p(turn2(i), 1)
+
+          hij = mo_two_e_integral(p1, p2, h1, h2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+            enddo
+          endif
+        end do
+      end do
+    end if
+
+  else
+    if(tip == 0) then
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+      do i=1,3
+        puti = p(i, ma)
+        if(bannedOrb(puti,ma)) cycle
+        do j=i+1,4
+          putj = p(j, ma)
+          if(bannedOrb(putj,ma)) cycle
+          if(banned(puti,putj,1)) cycle
+
+          i1 = turn2d(1, i, j)
+          i2 = turn2d(2, i, j)
+          p1 = p(i1, ma)
+          p2 = p(i2, ma)
+          hij = mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)
+          if (hij == 0.d0) cycle
+
+          hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
+          enddo
+        end do
+      end do
+    else if(tip == 3) then
+      h1 = h(1, mi)
+      h2 = h(1, ma)
+      p1 = p(1, mi)
+      do i=1,3
+        puti = p(turn3(1,i), ma)
+        if(bannedOrb(puti,ma)) cycle
+        putj = p(turn3(2,i), ma)
+        if(bannedOrb(putj,ma)) cycle
+        if(banned(puti,putj,1)) cycle
+        p2 = p(i, ma)
+
+        hij = mo_two_e_integral(p1, p2, h1, h2)
+        if (hij == 0.d0) cycle
+
+        hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        if (puti < putj) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+          enddo
+        else
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, putj, puti) = mat(k, putj, puti) + coefs(k) * hij
+          enddo
+        endif
+      end do
+    else ! tip == 4
+      puti = p(1, sp)
+      putj = p(2, sp)
+      if(.not. banned(puti,putj,1)) then
+        p1 = p(1, mi)
+        p2 = p(2, mi)
+        h1 = h(1, mi)
+        h2 = h(2, mi)
+        hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2))
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+          enddo
+        end if
+      end if
+    end if
+  end if
+
+end subroutine get_d2
+
+! ---
+
+subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in)  :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision, intent(in)   :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision, external     :: get_phase_bi, mo_two_e_integral
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib, k, l
+
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer                        :: bant
+  double precision, allocatable :: hij_cache(:,:)
+  double precision               :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num)
+
+  PROVIDE mo_integrals_map N_int
+
+  allocate (lbanned(mo_num, 2))
+  allocate (hij_cache(mo_num,2))
+  lbanned = bannedOrb
+
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+
+  bant = 1
+
+  if(sp == 3) then
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+    if(.not. bannedOrb(puti, mi)) then
+      call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map)
+      call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map)
+      tmp_row = 0d0
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,1) - hij_cache(putj,2)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,2) - hij_cache(putj,1)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+
+      if(ma == 1) then
+        mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
+      else
+        do l=1,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k,puti,l) = mat(k,puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+      end if
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+    tmp_row = 0d0
+    tmp_row2 = 0d0
+    call get_mo_two_e_integrals(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map)
+    call get_mo_two_e_integrals(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map)
+    putj = p1
+    do puti = 1, mo_num !HOT
+
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,2)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+          enddo
+        endif
+      endif
+
+      putj = p2
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,1)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+          enddo
+        endif
+      endif
+
+    enddo
+
+    if(mi == 1) then
+      mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
+      mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
+    else
+      do l=1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p1,l) = mat(k,p1,l) + tmp_row(k,l)
+          mat(k,p2,l) = mat(k,p2,l) + tmp_row2(k,l)
+        enddo
+      enddo
+    end if
+
+  else  ! sp /= 3
+
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+        call get_mo_two_e_integrals(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map)
+        call get_mo_two_e_integrals(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map)
+        tmp_row = 0d0
+        do putj=1,hfix-1
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,1) - hij_cache(putj,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+        do putj=hfix+1,mo_num
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,2) - hij_cache(putj,1)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+
+        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat(k, puti, l) = mat(k, puti,l) + tmp_row(k,l)
+          enddo
+        enddo
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+      tmp_row = 0d0
+      tmp_row2 = 0d0
+      call get_mo_two_e_integrals(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map)
+      call get_mo_two_e_integrals(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map)
+      putj = p2
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,1)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+      end do
+      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
+      do l=p2,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p2,l) = mat(k,p2,l) + tmp_row(k,l)
+        enddo
+      enddo
+      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
+      do l=p1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k,p1,l) = mat(k,p1,l) + tmp_row2(k,l)
+        enddo
+      enddo
+    end if
+  end if
+  deallocate(lbanned,hij_cache)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1 = 1, p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      do i2 = ib, p(0,s2)
+        p1 = p(i1,s1)
+        p2 = p(i2,s2)
+        if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+        call i_h_j(gen, det, N_int, hij)
+        !DIR$ LOOP COUNT AVG(4)
+        do k = 1, N_states
+          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij
+        enddo
+      enddo
+    enddo
+
+end subroutine get_d1
+
+! ---
+
+subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind) :: det(N_int, 2)
+  double precision, intent(in) :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  integer :: i, j, k, s, h1, h2, p1, p2, puti, putj
+  double precision :: hij, phase
+  double precision, external :: get_phase_bi, mo_two_e_integral
+  logical :: ok
+
+  integer, parameter :: bant=1
+  double precision, allocatable :: hij_cache1(:), hij_cache2(:)
+  allocate (hij_cache1(mo_num),hij_cache2(mo_num))
+
+
+  if(sp == 3) then ! AB
+    h1 = p(1,1)
+    h2 = p(1,2)
+    do p1=1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+      call get_mo_two_e_integrals(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map)
+      do p2=1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, bant)) cycle ! rentable?
+        if(p1 == h1 .or. p2 == h2) then
+          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
+          call i_h_j(gen, det, N_int, hij)
+        else
+          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          hij = hij_cache1(p2) * phase
+        end if
+        if (hij == 0.d0) cycle
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij  ! HOTSPOT
+        enddo
+      end do
+    end do
+
+  else ! AA BB
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti=1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+      call get_mo_two_e_integrals(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map)
+      call get_mo_two_e_integrals(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map)
+      do putj=puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, bant)) cycle ! rentable?
+        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
+          call i_h_j(gen, det, N_int, hij)
+          if (hij == 0.d0) cycle
+        else
+          hij = (mo_two_e_integral(p1, p2, puti, putj) -  mo_two_e_integral(p2, p1, puti, putj))
+          if (hij == 0.d0) cycle
+          hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+        end if
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+        enddo
+      end do
+    end do
+  end if
+
+  deallocate(hij_cache1,hij_cache2)
+
+end subroutine get_d0
+
+! ---
+
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+!subroutine get_pm2(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
+!
+!  use bitmasks
+!
+!  implicit none
+!
+!  integer,           intent(in)   :: h(0:2,2), p(0:4,2), sp
+!  integer(bit_kind), intent(in)   :: mask(N_int, 2), gen(N_int, 2), phasemask(N_int,2)
+!  logical,           intent(in)   :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+!  double precision,  intent(in)   :: coefs(N_states)
+!  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
+!
+!  integer, parameter              :: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
+!  integer, parameter              :: turn2(2) = (/2, 1/)
+!  integer, parameter              :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+!
+!  integer                         :: i, j, k, tip, ma, mi, puti, putj
+!  integer                         :: h1, h2, p1, p2, i1, i2
+!  integer                         :: bant
+!  double precision                :: hij_p, hij_m, phase
+!
+!  double precision, external      :: get_phase_bi
+!  double precision, external      :: get_mo_two_e_integral_tc_int, get_mo_two_e_integral_tcdag_int 
+!
+!  PROVIDE mo_integrals_tc_int_map mo_integrals_tcdag_int_map
+!
+!  bant = 1
+!
+!  tip = p(0,1) * p(0,2)
+!
+!  ma = sp
+!  if(p(0,1) > p(0,2)) ma = 1
+!  if(p(0,1) < p(0,2)) ma = 2
+!  mi = mod(ma, 2) + 1
+!
+!  if(sp == 3) then
+!    if(ma == 2) bant = 2
+!    if(tip == 3) then
+!      puti = p(1, mi)
+!      if(bannedOrb(puti, mi)) return
+!      h1 = h(1, ma)
+!      h2 = h(2, ma)
+!
+!      do i = 1, 3
+!        putj = p(i, ma)
+!        if(banned(putj,puti,bant)) cycle
+!        i1 = turn3(1,i)
+!        i2 = turn3(2,i)
+!        p1 = p(i1, ma)
+!        p2 = p(i2, ma)
+!
+!        hij_p = get_mo_two_e_integral_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
+!              - get_mo_two_e_integral_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
+!        hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
+!              - get_mo_two_e_integral_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
+!
+!        if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle
+!
+!        hij_p = hij_p * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+!        hij_m = hij_m * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+!
+!        if(ma == 1) then
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k = 1, N_states
+!            mat_p(k, putj, puti) = mat_p(k, putj, puti) + coefs(k) * hij_p
+!            mat_m(k, putj, puti) = mat_m(k, putj, puti) + coefs(k) * hij_m
+!          enddo
+!        else
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k = 1, N_states
+!            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!          enddo
+!        end if
+!      end do
+!
+!    else
+!
+!      h1 = h(1,1)
+!      h2 = h(1,2)
+!      do j = 1,2
+!        putj = p(j, 2)
+!        if(bannedOrb(putj, 2)) cycle
+!        p2 = p(turn2(j), 2)
+!        do i = 1,2
+!          puti = p(i, 1)
+!
+!          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
+!          p1 = p(turn2(i), 1)
+!
+!          hij_p = get_mo_two_e_integral_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   )
+!          hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map)
+!
+!          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!            hij_p = hij_p * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+!            hij_m = hij_m * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+!            !DIR$ LOOP COUNT AVG(4)
+!            do k = 1, N_states
+!              mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!              mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!            enddo
+!          endif
+!        end do
+!      end do
+!    end if
+!
+!  else
+!    if(tip == 0) then
+!      h1 = h(1, ma)
+!      h2 = h(2, ma)
+!      do i=1,3
+!        puti = p(i, ma)
+!        if(bannedOrb(puti,ma)) cycle
+!        do j=i+1,4
+!          putj = p(j, ma)
+!          if(bannedOrb(putj,ma)) cycle
+!          if(banned(puti,putj,1)) cycle
+!
+!          i1 = turn2d(1, i, j)
+!          i2 = turn2d(2, i, j)
+!          p1 = p(i1, ma)
+!          p2 = p(i2, ma)
+!
+!          hij_p = get_mo_two_e_integral_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
+!                - get_mo_two_e_integral_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
+!          hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
+!                - get_mo_two_e_integral_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
+!
+!          if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle
+!
+!          hij_p = hij_p * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+!
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!          enddo
+!        end do
+!      end do
+!
+!    else if(tip == 3) then
+!      h1 = h(1, mi)
+!      h2 = h(1, ma)
+!      p1 = p(1, mi)
+!      do i=1,3
+!        puti = p(turn3(1,i), ma)
+!        if(bannedOrb(puti,ma)) cycle
+!        putj = p(turn3(2,i), ma)
+!        if(bannedOrb(putj,ma)) cycle
+!        if(banned(puti,putj,1)) cycle
+!        p2 = p(i, ma)
+!
+!        hij_p = get_mo_two_e_integral_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   )
+!        hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map)
+!
+!        if( (hij_p.eq.0.d0) .and. (hij_m.eq.0.d0) ) cycle
+!
+!        hij_p = hij_p * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+!        hij_m = hij_m * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+!        if (puti < putj) then
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!          enddo
+!        else
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k, putj, puti) = mat_p(k, putj, puti) + coefs(k) * hij_p
+!            mat_m(k, putj, puti) = mat_m(k, putj, puti) + coefs(k) * hij_m
+!          enddo
+!        endif
+!      end do
+!    else ! tip == 4
+!      puti = p(1, sp)
+!      putj = p(2, sp)
+!      if(.not. banned(puti,putj,1)) then
+!        p1 = p(1, mi)
+!        p2 = p(2, mi)
+!        h1 = h(1, mi)
+!        h2 = h(2, mi)
+!
+!        hij_p = get_mo_two_e_integral_tc_int   (p1, p2, h1, h2, mo_integrals_tc_int_map   ) &
+!              - get_mo_two_e_integral_tc_int   (p2, p1, h1, h2, mo_integrals_tc_int_map   )
+!        hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, h1, h2, mo_integrals_tcdag_int_map) &
+!              - get_mo_two_e_integral_tcdag_int(p2, p1, h1, h2, mo_integrals_tcdag_int_map)
+!
+!        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!          hij_p = hij_p * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!            mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!          enddo
+!        end if
+!      end if
+!    end if
+!  end if
+!
+!end subroutine get_pm2
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+!subroutine get_pm1(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
+!
+!  use bitmasks
+!
+!  implicit none
+!
+!  integer(bit_kind)               :: det(N_int, 2)
+!  integer(bit_kind), intent(in)   :: mask(N_int, 2), gen(N_int, 2)
+!  integer(bit_kind), intent(in)   :: phasemask(N_int,2)
+!  logical,           intent(in)   :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+!  double precision,  intent(in)   :: coefs(N_states)
+!  integer,           intent(in)   :: h(0:2,2), p(0:4,2), sp
+!  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
+!
+!  double precision, external      :: get_phase_bi
+!  double precision, external      :: get_mo_two_e_integral_tc_int, get_mo_two_e_integral_tcdag_int
+!
+!  logical                         :: ok
+!  logical, allocatable            :: lbanned(:,:)
+!  integer                         :: bant
+!  integer                         :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+!  integer                         :: hfix, pfix, h1, h2, p1, p2, ib, k, l
+!  double precision                :: tmp_row_p (N_states, mo_num), tmp_row_m (N_states, mo_num)
+!  double precision                :: hij_p, hij_m, tmp_row2_p(N_states, mo_num), tmp_row2_m(N_states, mo_num)
+!  double precision, allocatable   :: hijp_cache(:,:), hijm_cache(:,:)
+!
+!  integer, parameter              :: turn2(2) = (/2,1/)
+!  integer, parameter              :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+!
+!  PROVIDE mo_integrals_tc_int_map mo_integrals_tcdag_int_map
+!
+!  allocate( lbanned(mo_num, 2) )
+!  allocate( hijp_cache(mo_num,2), hijm_cache(mo_num,2) )
+!  lbanned = bannedOrb
+!
+!  do i=1, p(0,1)
+!    lbanned(p(i,1), 1) = .true.
+!  end do
+!  do i=1, p(0,2)
+!    lbanned(p(i,2), 2) = .true.
+!  end do
+!
+!  ma = 1
+!  if(p(0,2) >= 2) ma = 2
+!  mi = turn2(ma)
+!
+!  bant = 1
+!
+!  if(sp == 3) then
+!    !move MA
+!    if(ma == 2) bant = 2
+!    puti = p(1,mi)
+!    hfix = h(1,ma)
+!    p1 = p(1,ma)
+!    p2 = p(2,ma)
+!    if(.not. bannedOrb(puti, mi)) then
+!
+!      call get_mo_two_e_integrals_tc_int   (hfix, p1, p2, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!      call get_mo_two_e_integrals_tc_int   (hfix, p2, p1, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!      call get_mo_two_e_integrals_tcdag_int(hfix, p1, p2, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
+!      call get_mo_two_e_integrals_tcdag_int(hfix, p2, p1, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
+!
+!      tmp_row_p = 0d0
+!      tmp_row_m = 0d0
+!      do putj=1, hfix-1
+!        if(lbanned(putj, ma)) cycle
+!        if(banned(putj, puti,bant)) cycle
+!
+!        hij_p = hijp_cache(putj,1) - hijp_cache(putj,2)
+!        hij_m = hijm_cache(putj,1) - hijm_cache(putj,2)
+!
+!        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!          hij_p = hij_p * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            tmp_row_p(k,putj) = tmp_row_p(k,putj) + hij_p * coefs(k)
+!            tmp_row_m(k,putj) = tmp_row_m(k,putj) + hij_m * coefs(k)
+!          enddo
+!        endif
+!      end do
+!      do putj=hfix+1, mo_num
+!        if(lbanned(putj, ma)) cycle
+!        if(banned(putj, puti,bant)) cycle
+!
+!        hij_p = hijp_cache(putj,2) - hijp_cache(putj,1)
+!        hij_m = hijm_cache(putj,2) - hijm_cache(putj,1)
+!
+!        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!          hij_p = hij_p * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            tmp_row_p(k,putj) = tmp_row_p(k,putj) + hij_p * coefs(k)
+!            tmp_row_m(k,putj) = tmp_row_m(k,putj) + hij_m * coefs(k)
+!          enddo
+!        endif
+!      end do
+!
+!      if(ma == 1) then
+!        mat_p(1:N_states,1:mo_num,puti) = mat_p(1:N_states,1:mo_num,puti) + tmp_row_p(1:N_states,1:mo_num)
+!        mat_m(1:N_states,1:mo_num,puti) = mat_m(1:N_states,1:mo_num,puti) + tmp_row_m(1:N_states,1:mo_num)
+!      else
+!        do l=1,mo_num
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k,puti,l) = mat_p(k,puti,l) + tmp_row_p(k,l)
+!            mat_m(k,puti,l) = mat_m(k,puti,l) + tmp_row_m(k,l)
+!          enddo
+!        enddo
+!      end if
+!    end if
+!
+!    !MOVE MI
+!    pfix = p(1,mi)
+!    tmp_row_p  = 0d0
+!    tmp_row_m  = 0d0
+!    tmp_row2_p = 0d0
+!    tmp_row2_m = 0d0
+!
+!    call get_mo_two_e_integrals_tc_int   (hfix, pfix, p1, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!    call get_mo_two_e_integrals_tc_int   (hfix, pfix, p2, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!    call get_mo_two_e_integrals_tcdag_int(hfix, pfix, p1, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
+!    call get_mo_two_e_integrals_tcdag_int(hfix, pfix, p2, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
+!
+!    putj = p1
+!    do puti=1,mo_num !HOT
+!      if(lbanned(puti,mi)) cycle
+!      !p1 fixed
+!      putj = p1
+!      if(.not. banned(putj,puti,bant)) then
+!
+!        hij_p = hijp_cache(puti,2)
+!        hij_m = hijm_cache(puti,2)
+!
+!        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!          hij_p = hij_p * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            tmp_row_p(k,puti) = tmp_row_p(k,puti) + hij_p * coefs(k)
+!            tmp_row_m(k,puti) = tmp_row_m(k,puti) + hij_m * coefs(k)
+!          enddo
+!        endif
+!      end if
+!
+!      putj = p2
+!      if(.not. banned(putj,puti,bant)) then
+!
+!        hij_p = hijp_cache(puti,1)
+!        hij_m = hijm_cache(puti,1)
+!
+!        if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!          hij_p = hij_p * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+!          do k=1,N_states
+!            tmp_row2_p(k,puti) = tmp_row2_p(k,puti) + hij_p * coefs(k)
+!            tmp_row2_m(k,puti) = tmp_row2_m(k,puti) + hij_m * coefs(k)
+!          enddo
+!        endif
+!      end if
+!    end do
+!
+!    if(mi == 1) then
+!      mat_p(:,:,p1) = mat_p(:,:,p1) + tmp_row_p (:,:)
+!      mat_p(:,:,p2) = mat_p(:,:,p2) + tmp_row2_p(:,:)
+!      mat_m(:,:,p1) = mat_m(:,:,p1) + tmp_row_m (:,:)
+!      mat_m(:,:,p2) = mat_m(:,:,p2) + tmp_row2_m(:,:)
+!    else
+!      do l=1,mo_num
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row_p (k,l)
+!          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row2_p(k,l)
+!          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row_m (k,l)
+!          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row2_m(k,l)
+!        enddo
+!      enddo
+!    end if
+!
+!  else  ! sp /= 3
+!
+!    if(p(0,ma) == 3) then
+!      do i=1,3
+!        hfix = h(1,ma)
+!        puti = p(i, ma)
+!        p1 = p(turn3(1,i), ma)
+!        p2 = p(turn3(2,i), ma)
+!
+!        call get_mo_two_e_integrals_tc_int   (hfix, p1, p2, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!        call get_mo_two_e_integrals_tc_int   (hfix, p2, p1, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!        call get_mo_two_e_integrals_tcdag_int(hfix, p1, p2, mo_num, hijm_cache(1,1), mo_integrals_tcdag_int_map)
+!        call get_mo_two_e_integrals_tcdag_int(hfix, p2, p1, mo_num, hijm_cache(1,2), mo_integrals_tcdag_int_map)
+!
+!        tmp_row_p = 0d0
+!        tmp_row_m = 0d0
+!        do putj=1,hfix-1
+!          if(banned(putj,puti,1)) cycle
+!          if(lbanned(putj,ma)) cycle
+!
+!          hij_p = hijp_cache(putj,1) - hijp_cache(putj,2)
+!          hij_m = hijm_cache(putj,1) - hijm_cache(putj,2)
+!
+!          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!            hij_p = hij_p * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+!            hij_m = hij_m * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+!            tmp_row_p(:,putj) = tmp_row_p(:,putj) + hij_p * coefs(:)
+!            tmp_row_m(:,putj) = tmp_row_m(:,putj) + hij_m * coefs(:)
+!          endif
+!        end do
+!        do putj=hfix+1,mo_num
+!          if(banned(putj,puti,1)) cycle
+!          if(lbanned(putj,ma)) cycle
+!
+!          hij_p = hijp_cache(putj,2) - hijp_cache(putj,1)
+!          hij_m = hijm_cache(putj,2) - hijm_cache(putj,1)
+!
+!          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!            hij_p = hij_p * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+!            hij_m = hij_m * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+!            tmp_row_p(:,putj) = tmp_row_p(:,putj) + hij_p * coefs(:)
+!            tmp_row_m(:,putj) = tmp_row_m(:,putj) + hij_m * coefs(:)
+!          endif
+!        end do
+!
+!        mat_p(:, :puti-1, puti) = mat_p(:, :puti-1, puti) + tmp_row_p(:,:puti-1)
+!        mat_m(:, :puti-1, puti) = mat_m(:, :puti-1, puti) + tmp_row_m(:,:puti-1)
+!        do l=puti,mo_num
+!          !DIR$ LOOP COUNT AVG(4)
+!          do k=1,N_states
+!            mat_p(k, puti, l) = mat_p(k, puti,l) + tmp_row_p(k,l)
+!            mat_m(k, puti, l) = mat_m(k, puti,l) + tmp_row_m(k,l)
+!          enddo
+!        enddo
+!      end do
+!    else
+!      hfix = h(1,mi)
+!      pfix = p(1,mi)
+!      p1 = p(1,ma)
+!      p2 = p(2,ma)
+!      tmp_row_p  = 0d0
+!      tmp_row_m  = 0d0
+!      tmp_row2_p = 0d0
+!      tmp_row2_m = 0d0
+!
+!      call get_mo_two_e_integrals_tc_int   (hfix, p1, pfix, mo_num, hijp_cache(1,1), mo_integrals_tc_int_map   )
+!      call get_mo_two_e_integrals_tc_int   (hfix, p2, pfix, mo_num, hijp_cache(1,2), mo_integrals_tc_int_map   )
+!      call get_mo_two_e_integrals_tcdag_int(hfix, p1, pfix, mo_num, hijp_cache(1,1), mo_integrals_tcdag_int_map)
+!      call get_mo_two_e_integrals_tcdag_int(hfix, p2, pfix, mo_num, hijp_cache(1,2), mo_integrals_tcdag_int_map)
+!
+!      putj = p2
+!      do puti=1,mo_num
+!        if(lbanned(puti,ma)) cycle
+!        putj = p2
+!        if(.not. banned(puti,putj,1)) then
+!
+!          hij_p = hijp_cache(puti,1)
+!          hij_m = hijm_cache(puti,1)
+!
+!          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!            hij_p = hij_p * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+!            hij_m = hij_m * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+!            !DIR$ LOOP COUNT AVG(4)
+!            do k=1,N_states
+!              tmp_row_p(k,puti) = tmp_row_p(k,puti) + hij_p * coefs(k)
+!              tmp_row_m(k,puti) = tmp_row_m(k,puti) + hij_m * coefs(k)
+!            enddo
+!          endif
+!        end if
+!
+!        putj = p1
+!        if(.not. banned(puti,putj,1)) then
+!          hij_p = hijp_cache(puti,2)
+!          hij_m = hijm_cache(puti,2)
+!          if( (hij_p.ne.0.d0) .and. (hij_m.ne.0.d0) ) then
+!            hij_p = hij_p * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+!            hij_m = hij_m * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+!            do k=1,N_states
+!              tmp_row2_p(k,puti) = tmp_row2_p(k,puti) + hij_p * coefs(k)
+!              tmp_row2_m(k,puti) = tmp_row2_m(k,puti) + hij_m * coefs(k)
+!            enddo
+!          endif
+!        end if
+!      end do
+!      mat_p(:,:p2-1,p2) = mat_p(:,:p2-1,p2) + tmp_row_p(:,:p2-1)
+!      mat_m(:,:p2-1,p2) = mat_m(:,:p2-1,p2) + tmp_row_m(:,:p2-1)
+!      do l=p2,mo_num
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          mat_p(k,p2,l) = mat_p(k,p2,l) + tmp_row_p(k,l)
+!          mat_m(k,p2,l) = mat_m(k,p2,l) + tmp_row_m(k,l)
+!        enddo
+!      enddo
+!      mat_p(:,:p1-1,p1) = mat_p(:,:p1-1,p1) + tmp_row2_p(:,:p1-1)
+!      mat_m(:,:p1-1,p1) = mat_m(:,:p1-1,p1) + tmp_row2_m(:,:p1-1)
+!      do l=p1,mo_num
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          mat_p(k,p1,l) = mat_p(k,p1,l) + tmp_row2_p(k,l)
+!          mat_m(k,p1,l) = mat_m(k,p1,l) + tmp_row2_m(k,l)
+!        enddo
+!      enddo
+!    end if
+!  end if
+!  deallocate(lbanned,hijp_cache, hijm_cache)
+!
+! !! MONO
+!  if(sp == 3) then
+!    s1 = 1
+!    s2 = 2
+!  else
+!    s1 = sp
+!    s2 = sp
+!  end if
+!
+!  do i1 = 1, p(0,s1)
+!    ib = 1
+!    if(s1 == s2) ib = i1+1
+!    do i2 = ib, p(0,s2)
+!      p1 = p(i1,s1)
+!      p2 = p(i2,s2)
+!      if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+!      call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+!
+!      call htilde_mu_mat_tot   (gen, det, N_int, hij_p)
+!      call htildedag_mu_mat_tot(gen, det, N_int, hij_m)
+!
+!      !DIR$ LOOP COUNT AVG(4)
+!      do k = 1, N_states
+!        mat_p(k, p1, p2) = mat_p(k, p1, p2) + coefs(k) * hij_p
+!        mat_m(k, p1, p2) = mat_m(k, p1, p2) + coefs(k) * hij_m
+!      enddo
+!    enddo
+!  enddo
+!
+!end subroutine get_pm1
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+
+
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+!subroutine get_pm0(gen, phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, coefs)
+!
+!  use bitmasks
+!  implicit none
+!
+!  integer(bit_kind)               :: det(N_int, 2)
+!  integer(bit_kind), intent(in)   :: gen(N_int, 2), mask(N_int, 2)
+!  integer(bit_kind), intent(in)   :: phasemask(N_int,2)
+!  integer,           intent(in)   :: h(0:2,2), p(0:4,2), sp
+!  logical,           intent(in)   :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+!  double precision,  intent(in)   :: coefs(N_states)
+!  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
+!
+!  double precision, external      :: get_phase_bi, mo_two_e_integral
+!  double precision, external      :: get_mo_two_e_integral_tc_int, get_mo_two_e_integral_tcdag_int 
+!  integer, parameter              :: bant=1
+!  integer                         :: i, j, k, s, h1, h2, p1, p2, puti, putj
+!  logical                         :: ok
+!  double precision                :: hij_p, hij_m, phase
+!  double precision, allocatable   :: hijp_cache1(:), hijp_cache2(:), hijm_cache1(:), hijm_cache2(:)
+!
+!  PROVIDE mo_integrals_tc_int_map mo_integrals_tcdag_int_map
+!
+!  allocate( hijp_cache1(mo_num) , hijp_cache2(mo_num) )
+!  allocate( hijm_cache1(mo_num) , hijm_cache2(mo_num) )
+!
+!  if(sp == 3) then ! AB
+!    h1 = p(1,1)
+!    h2 = p(1,2)
+!    do p1=1, mo_num
+!      if(bannedOrb(p1, 1)) cycle
+!
+!      call get_mo_two_e_integrals_tc_int   (p1, h2, h1, mo_num, hijp_cache1, mo_integrals_tc_int_map   )
+!      call get_mo_two_e_integrals_tcdag_int(p1, h2, h1, mo_num, hijm_cache1, mo_integrals_tcdag_int_map)
+!
+!      do p2 = 1, mo_num
+!        if(bannedOrb(p2,2)) cycle
+!        if(banned(p1, p2, bant)) cycle ! rentable?
+!        if(p1 == h1 .or. p2 == h2) then
+!          call apply_particles(mask, 1, p1, 2, p2, det, ok, N_int)
+!          call htilde_mu_mat_tot   (gen, det, N_int, hij_p)
+!          call htildedag_mu_mat_tot(gen, det, N_int, hij_m)
+!        else
+!          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+!          hij_p = hijp_cache1(p2) * phase
+!          hij_m = hijm_cache1(p2) * phase
+!        end if
+!        if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k = 1, N_states
+!          mat_p(k, p1, p2) = mat_p(k, p1, p2) + coefs(k) * hij_p  ! HOTSPOT
+!          mat_m(k, p1, p2) = mat_m(k, p1, p2) + coefs(k) * hij_m  ! HOTSPOT
+!        enddo
+!      end do
+!    end do
+!
+!  else ! AA BB
+!    p1 = p(1,sp)
+!    p2 = p(2,sp)
+!    do puti=1, mo_num
+!      if(bannedOrb(puti, sp)) cycle
+!
+!      call get_mo_two_e_integrals_tc_int   (puti, p2, p1, mo_num, hijp_cache1, mo_integrals_tc_int_map   )
+!      call get_mo_two_e_integrals_tc_int   (puti, p1, p2, mo_num, hijp_cache2, mo_integrals_tc_int_map   )
+!      call get_mo_two_e_integrals_tcdag_int(puti, p2, p1, mo_num, hijm_cache1, mo_integrals_tcdag_int_map)
+!      call get_mo_two_e_integrals_tcdag_int(puti, p1, p2, mo_num, hijm_cache2, mo_integrals_tcdag_int_map)
+!
+!      do putj=puti+1, mo_num
+!        if(bannedOrb(putj, sp)) cycle
+!        if(banned(puti, putj, bant)) cycle ! rentable?
+!        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+!          call apply_particles(mask, sp, puti, sp, putj, det, ok, N_int)
+!          call htilde_mu_mat_tot   (gen, det, N_int, hij_p)
+!          call htildedag_mu_mat_tot(gen, det, N_int, hij_m)
+!          if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle
+!        else
+!
+!          hij_p = get_mo_two_e_integral_tc_int   (p1, p2, puti, putj, mo_integrals_tc_int_map   ) &
+!                - get_mo_two_e_integral_tc_int   (p2, p1, puti, putj, mo_integrals_tc_int_map   )
+!          hij_m = get_mo_two_e_integral_tcdag_int(p1, p2, puti, putj, mo_integrals_tcdag_int_map) &
+!                - get_mo_two_e_integral_tcdag_int(p2, p1, puti, putj, mo_integrals_tcdag_int_map)
+!
+!          if( (hij_p.eq.0.d0).and.(hij_m.eq.0.d0) ) cycle
+!
+!          hij_p = hij_p * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+!          hij_m = hij_m * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+!
+!        end if
+!        !DIR$ LOOP COUNT AVG(4)
+!        do k=1,N_states
+!          mat_p(k, puti, putj) = mat_p(k, puti, putj) + coefs(k) * hij_p
+!          mat_m(k, puti, putj) = mat_m(k, puti, putj) + coefs(k) * hij_m
+!        enddo
+!      end do
+!    end do
+!  end if
+!
+!  deallocate( hijp_cache1 , hijp_cache2 )
+!  deallocate( hijm_cache1 , hijm_cache2 )
+!
+!end subroutine get_pm0
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+! ___________________________________________________________________________________________________________________________________________________
+
+
+! OLD unoptimized routines for debugging
+! ======================================
+
+subroutine get_d0_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind) :: det(N_int, 2)
+  double precision, intent(in) :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  integer :: i, j, s, h1, h2, p1, p2, puti, putj
+  double precision :: hij, phase
+  double precision, external :: get_phase_bi, mo_two_e_integral
+  logical :: ok
+
+  integer :: bant
+  bant = 1
+
+
+  if(sp == 3) then ! AB
+    h1 = p(1,1)
+    h2 = p(1,2)
+    do p1=1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+      do p2=1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, bant)) cycle ! rentable?
+        if(p1 == h1 .or. p2 == h2) then
+          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
+          call i_h_j(gen, det, N_int, hij)
+        else
+          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          hij = mo_two_e_integral(p1, p2, h1, h2) * phase
+        end if
+        mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij
+      end do
+    end do
+  else ! AA BB
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti=1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+      do putj=puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, bant)) cycle ! rentable?
+        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
+          call i_h_j(gen, det, N_int, hij)
+        else
+          hij = (mo_two_e_integral(p1, p2, puti, putj) -  mo_two_e_integral(p2, p1, puti, putj))* get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+        end if
+        mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
+      end do
+    end do
+  end if
+
+end subroutine get_d0_reference
+
+! ---
+
+subroutine get_d1_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in)  :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision, intent(in)   :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision               :: hij, tmp_row(N_states, mo_num), tmp_row2(N_states, mo_num)
+  double precision, external     :: get_phase_bi, mo_two_e_integral
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib
+
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer                        :: bant
+
+
+  allocate (lbanned(mo_num, 2))
+  lbanned = bannedOrb
+
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+
+  bant = 1
+
+  if(sp == 3) then
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+    if(.not. bannedOrb(puti, mi)) then
+      tmp_row = 0d0
+      do putj=1, hfix-1
+        if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle
+        hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+        tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states)
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma) .or. banned(putj, puti,bant)) cycle
+        hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+        tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states)
+      end do
+
+      if(ma == 1) then
+        mat(1:N_states,1:mo_num,puti) = mat(1:N_states,1:mo_num,puti) + tmp_row(1:N_states,1:mo_num)
+      else
+        mat(1:N_states,puti,1:mo_num) = mat(1:N_states,puti,1:mo_num) + tmp_row(1:N_states,1:mo_num)
+      end if
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+    tmp_row = 0d0
+    tmp_row2 = 0d0
+    do puti=1,mo_num
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = mo_two_e_integral(p2,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+        tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:)
+      end if
+
+      putj = p2
+      if(.not. banned(putj,puti,bant)) then
+        hij = mo_two_e_integral(p1,pfix,hfix,puti) * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+        tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:)
+      end if
+    end do
+
+    if(mi == 1) then
+      mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
+      mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
+    else
+      mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:)
+      mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:)
+    end if
+  else
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+        tmp_row = 0d0
+        do putj=1,hfix-1
+          if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle
+          hij = (mo_two_e_integral(p1, p2, putj, hfix)-mo_two_e_integral(p2,p1,putj,hfix)) * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+        end do
+        do putj=hfix+1,mo_num
+          if(lbanned(putj,ma) .or. banned(puti,putj,1)) cycle
+          hij = (mo_two_e_integral(p1, p2, hfix, putj)-mo_two_e_integral(p2,p1,hfix,putj)) * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+        end do
+
+        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
+        mat(:, puti, puti:) = mat(:, puti, puti:) + tmp_row(:,puti:)
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+      tmp_row = 0d0
+      tmp_row2 = 0d0
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = mo_two_e_integral(pfix, p1, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+          tmp_row(:,puti) = tmp_row(:,puti) + hij * coefs(:)
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = mo_two_e_integral(pfix, p2, hfix, puti) * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+          tmp_row2(:,puti) = tmp_row2(:,puti) + hij * coefs(:)
+        end if
+      end do
+      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
+      mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:)
+      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
+      mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:)
+    end if
+  end if
+  deallocate(lbanned)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1=1,p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      do i2=ib,p(0,s2)
+        p1 = p(i1,s1)
+        p2 = p(i2,s2)
+        if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+        call i_h_j(gen, det, N_int, hij)
+        mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij
+      end do
+    end do
+
+end subroutine get_d1_reference
+
+! ---
+
+subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(2,N_int)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  double precision, intent(in) :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  double precision, external :: get_phase_bi, mo_two_e_integral
+
+  integer :: i, j, tip, ma, mi, puti, putj
+  integer :: h1, h2, p1, p2, i1, i2
+  double precision :: hij, phase
+
+  integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
+  integer, parameter :: turn2(2) = (/2, 1/)
+  integer, parameter :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer :: bant
+  bant = 1
+
+  tip = p(0,1) * p(0,2)
+
+  ma = sp
+  if(p(0,1) > p(0,2)) ma = 1
+  if(p(0,1) < p(0,2)) ma = 2
+  mi = mod(ma, 2) + 1
+
+  if(sp == 3) then
+    if(ma == 2) bant = 2
+
+    if(tip == 3) then
+      puti = p(1, mi)
+      do i = 1, 3
+        putj = p(i, ma)
+        if(banned(putj,puti,bant)) cycle
+        i1 = turn3(1,i)
+        i2 = turn3(2,i)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        h1 = h(1, ma)
+        h2 = h(2, ma)
+
+        hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+        if(ma == 1) then
+          mat(:, putj, puti) = mat(:, putj, puti) + coefs(:) * hij
+        else
+          mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
+        end if
+      end do
+    else
+      h1 = h(1,1)
+      h2 = h(1,2)
+      do j = 1,2
+        putj = p(j, 2)
+        p2 = p(turn2(j), 2)
+        do i = 1,2
+          puti = p(i, 1)
+
+          if(banned(puti,putj,bant)) cycle
+          p1 = p(turn2(i), 1)
+
+          hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2,N_int)
+          mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
+        end do
+      end do
+    end if
+
+  else
+    if(tip == 0) then
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+      do i=1,3
+      puti = p(i, ma)
+      do j=i+1,4
+        putj = p(j, ma)
+        if(banned(puti,putj,1)) cycle
+
+        i1 = turn2d(1, i, j)
+        i2 = turn2d(2, i, j)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2,N_int)
+        mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
+      end do
+      end do
+    else if(tip == 3) then
+      h1 = h(1, mi)
+      h2 = h(1, ma)
+      p1 = p(1, mi)
+      do i=1,3
+        puti = p(turn3(1,i), ma)
+        putj = p(turn3(2,i), ma)
+        if(banned(puti,putj,1)) cycle
+        p2 = p(i, ma)
+
+        hij = mo_two_e_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2,N_int)
+        mat(:, min(puti, putj), max(puti, putj)) = mat(:, min(puti, putj), max(puti, putj)) + coefs(:) * hij
+      end do
+    else ! tip == 4
+      puti = p(1, sp)
+      putj = p(2, sp)
+      if(.not. banned(puti,putj,1)) then
+        p1 = p(1, mi)
+        p2 = p(2, mi)
+        h1 = h(1, mi)
+        h2 = h(2, mi)
+        hij = (mo_two_e_integral(p1, p2, h1, h2) - mo_two_e_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2,N_int)
+        mat(:, puti, putj) = mat(:, puti, putj) + coefs(:) * hij
+      end if
+    end if
+  end if
+
+end subroutine get_d2_reference(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+
+! ---
+
diff --git a/src/cipsi_tc_bi_ortho/lock_2rdm.irp.f b/src/cipsi_tc_bi_ortho/lock_2rdm.irp.f
new file mode 100644
index 00000000..e69de29b
diff --git a/src/cipsi_tc_bi_ortho/pt2.irp.f b/src/cipsi_tc_bi_ortho/pt2.irp.f
new file mode 100644
index 00000000..e7dca456
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/pt2.irp.f
@@ -0,0 +1,89 @@
+subroutine pt2_tc_bi_ortho
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Selected Full Configuration Interaction with Stochastic selection and PT2.
+  END_DOC
+  integer                        :: i,j,k,ndet
+  double precision, allocatable  :: zeros(:)
+  integer                        :: to_select
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  logical, external              :: qp_stop
+  logical                        :: print_pt2
+
+  double precision :: rss
+  double precision, external :: memory_of_double
+  double precision :: correlation_energy_ratio,E_denom,E_tc,norm
+  double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:)
+  PROVIDE H_apply_buffer_allocated distributed_davidson mo_two_e_integrals_in_map
+
+  print*,'Diagonal elements of the Fock matrix '
+  do i = 1, mo_num
+   write(*,*)i,Fock_matrix_tc_mo_tot(i,i)
+  enddo
+  N_iter = 1
+  threshold_generators = 1.d0
+  SOFT_TOUCH threshold_generators
+
+  rss = memory_of_double(N_states)*4.d0
+  call check_mem(rss,irp_here)
+
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+
+  double precision               :: hf_energy_ref
+  logical                        :: has
+  double precision               :: relative_error
+
+  relative_error=PT2_relative_error
+
+  zeros = 0.d0
+  pt2_data % pt2   = -huge(1.e0)
+  pt2_data % rpt2  = -huge(1.e0)
+  pt2_data % overlap= 0.d0
+  pt2_data % variance = huge(1.e0)
+
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  print_pt2 = .False.
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  call routine_save_right
+
+  if (N_det > N_det_max) then
+    psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det)
+    psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states)
+    N_det = N_det_max
+    soft_touch N_det psi_det psi_coef
+    if (s2_eig) then
+      call make_s2_eigenfunction
+    endif
+    print_pt2 = .False.
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+  endif
+
+  allocate(ept2(1000),pt1(1000),extrap_energy(100))
+
+  correlation_energy_ratio = 0.d0
+
+! thresh_it_dav  = 5.d-5
+! soft_touch thresh_it_dav
+
+  print_pt2 = .True.
+  to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
+  to_select = max(N_states_diag, to_select)
+
+  E_denom = E_tc ! TC Energy of the current wave function 
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+  call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
+
+  N_iter += 1
+
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+
+end
+
diff --git a/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f
new file mode 100644
index 00000000..56e6bd14
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f
@@ -0,0 +1,869 @@
+BEGIN_PROVIDER [ integer, pt2_stoch_istate ]
+ implicit none
+ BEGIN_DOC
+ ! State for stochatsic PT2
+ END_DOC
+ pt2_stoch_istate = 1
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ]
+&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ]
+  implicit none
+  logical, external :: testTeethBuilding
+  integer :: i,j
+  pt2_n_tasks_max = elec_alpha_num*elec_alpha_num + elec_alpha_num*elec_beta_num  - n_core_orb*2
+  pt2_n_tasks_max = min(pt2_n_tasks_max,1+N_det_generators/10000)
+  call write_int(6,pt2_n_tasks_max,'pt2_n_tasks_max')
+
+  pt2_F(:) = max(int(sqrt(float(pt2_n_tasks_max))),1)
+  do i=1,pt2_n_0(1+pt2_N_teeth/4)
+    pt2_F(i) = pt2_n_tasks_max*pt2_min_parallel_tasks
+  enddo
+  do i=1+pt2_n_0(pt2_N_teeth-pt2_N_teeth/4), pt2_n_0(pt2_N_teeth-pt2_N_teeth/10)
+    pt2_F(i) = pt2_min_parallel_tasks
+  enddo
+  do i=1+pt2_n_0(pt2_N_teeth-pt2_N_teeth/10), N_det_generators
+    pt2_F(i) = 1
+  enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, pt2_N_teeth ]
+&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ]
+  implicit none
+  logical, external :: testTeethBuilding
+
+  if(N_det_generators < 500) then
+    pt2_minDetInFirstTeeth = 1
+    pt2_N_teeth = 1
+  else
+    pt2_minDetInFirstTeeth = min(5, N_det_generators)
+    do pt2_N_teeth=100,2,-1
+      if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit
+    end do
+  end if
+  call write_int(6,pt2_N_teeth,'Number of comb teeth')
+END_PROVIDER
+
+
+logical function testTeethBuilding(minF, N)
+  implicit none
+  integer, intent(in) :: minF, N
+  integer :: n0, i
+  double precision :: u0, Wt, r
+
+  double precision, allocatable :: tilde_w(:), tilde_cW(:)
+  integer, external :: dress_find_sample
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+
+  rss = memory_of_double(2*N_det_generators+1)
+  call check_mem(rss,irp_here)
+
+  allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
+
+  double precision :: norm2
+  norm2 = 0.d0
+  do i=N_det_generators,1,-1
+    tilde_w(i)  = psi_coef_sorted_tc_gen(i,pt2_stoch_istate) * &
+                  psi_coef_sorted_tc_gen(i,pt2_stoch_istate)
+    norm2 = norm2 + tilde_w(i)
+  enddo
+
+  f = 1.d0/norm2
+  tilde_w(:) = tilde_w(:) * f
+
+  tilde_cW(0) = -1.d0
+  do i=1,N_det_generators
+    tilde_cW(i) = tilde_cW(i-1) + tilde_w(i)
+  enddo
+  tilde_cW(:) = tilde_cW(:) + 1.d0
+  deallocate(tilde_w)
+
+  n0 = 0
+  testTeethBuilding = .false.
+  double precision :: f
+  integer :: minFN
+  minFN = N_det_generators - minF * N
+  f = 1.d0/dble(N)
+  do
+    u0 = tilde_cW(n0)
+    r = tilde_cW(n0 + minF)
+    Wt = (1d0 - u0) * f
+    if (dabs(Wt) <= 1.d-3) then
+      exit
+    endif
+    if(Wt >= r - u0) then
+       testTeethBuilding = .true.
+       exit
+    end if
+    n0 += 1
+    if(n0 > minFN) then
+      exit
+    end if
+  end do
+  deallocate(tilde_cW)
+
+end function
+
+
+
+subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
+  use f77_zmq
+  use selection_types
+
+  implicit none
+
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket, zmq_socket_pull
+  integer, intent(in)            :: N_in
+!  integer, intent(inout)         :: N_in
+  double precision, intent(in)   :: relative_error, E(N_states)
+  type(pt2_type), intent(inout)  :: pt2_data, pt2_data_err
+!
+  integer                        :: i, N
+
+  double precision               :: state_average_weight_save(N_states), w(N_states,4)
+  integer(ZMQ_PTR), external     :: new_zmq_to_qp_run_socket
+  type(selection_buffer)         :: b
+
+  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted_tc
+  PROVIDE psi_det_hii selection_weight pseudo_sym
+  PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
+  PROVIDE excitation_beta_max  excitation_alpha_max excitation_max
+  PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp
+
+  if (h0_type == 'CFG') then
+    PROVIDE psi_configuration_hii det_to_configuration
+  endif
+
+  if (N_det <= max(4,N_states) .or. pt2_N_teeth < 2) then
+    print*,'ZMQ_selection'
+    call ZMQ_selection(N_in, pt2_data)
+  else
+    print*,'else ZMQ_selection'
+
+    N = max(N_in,1) * N_states
+    state_average_weight_save(:) = state_average_weight(:)
+    if (int(N,8)*2_8 > huge(1)) then
+      print *,  irp_here, ': integer too large'
+      stop -1
+    endif
+    call create_selection_buffer(N, N*2, b)
+    ASSERT (associated(b%det))
+    ASSERT (associated(b%val))
+
+    do pt2_stoch_istate=1,N_states
+      state_average_weight(:) = 0.d0
+      state_average_weight(pt2_stoch_istate) = 1.d0
+      TOUCH state_average_weight pt2_stoch_istate selection_weight
+
+      PROVIDE nproc pt2_F mo_two_e_integrals_in_map mo_one_e_integrals pt2_w
+      PROVIDE psi_selectors pt2_u pt2_J pt2_R
+      call new_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
+
+      integer, external              :: zmq_put_psi
+      integer, external              :: zmq_put_N_det_generators
+      integer, external              :: zmq_put_N_det_selectors
+      integer, external              :: zmq_put_dvector
+      integer, external              :: zmq_put_ivector
+      if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
+        stop 'Unable to put psi on ZMQ server'
+      endif
+      if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then
+        stop 'Unable to put N_det_generators on ZMQ server'
+      endif
+      if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then
+        stop 'Unable to put N_det_selectors on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then
+        stop 'Unable to put energy on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then
+        stop 'Unable to put state_average_weight on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) then
+        stop 'Unable to put selection_weight on ZMQ server'
+      endif
+      if (zmq_put_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) then
+        stop 'Unable to put pt2_stoch_istate on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) then
+        stop 'Unable to put threshold_generators on ZMQ server'
+      endif
+
+
+      integer, external :: add_task_to_taskserver
+      character(300000) :: task
+
+      integer :: j,k,ipos,ifirst
+      ifirst=0
+
+      ipos=0
+      do i=1,N_det_generators
+        if (pt2_F(i) > 1) then
+          ipos += 1
+        endif
+      enddo
+      call write_int(6,sum(pt2_F),'Number of tasks')
+      call write_int(6,ipos,'Number of fragmented tasks')
+
+      ipos=1
+      do i= 1, N_det_generators
+        do j=1,pt2_F(pt2_J(i))
+          write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, pt2_J(i), N_in
+          ipos += 30
+          if (ipos > 300000-30) then
+            if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+              stop 'Unable to add task to task server'
+            endif
+            ipos=1
+            if (ifirst == 0) then
+              ifirst=1
+              if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+                print *,  irp_here, ': Failed in zmq_set_running'
+              endif
+            endif
+          endif
+        end do
+      enddo
+      if (ipos > 1) then
+        if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+          stop 'Unable to add task to task server'
+        endif
+      endif
+
+      integer, external :: zmq_set_running
+      if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+        print *,  irp_here, ': Failed in zmq_set_running'
+      endif
+
+
+      double precision :: mem_collector, mem, rss
+
+      call resident_memory(rss)
+
+      mem_collector = 8.d0 *                  & ! bytes
+            ( 1.d0*pt2_n_tasks_max            & ! task_id, index
+            + 0.635d0*N_det_generators        & ! f,d
+            + pt2_n_tasks_max*pt2_type_size(N_states) & ! pt2_data_task
+            + N_det_generators*pt2_type_size(N_states)  & ! pt2_data_I
+            + 4.d0*(pt2_N_teeth+1)            & ! S, S2, T2, T3
+            + 1.d0*(N_int*2.d0*N + N)         & ! selection buffer
+            + 1.d0*(N_int*2.d0*N + N)         & ! sort selection buffer
+            ) / 1024.d0**3
+
+      integer :: nproc_target, ii
+      nproc_target = nthreads_pt2
+      ii = min(N_det, (elec_alpha_num*(mo_num-elec_alpha_num))**2)
+
+      do
+        mem = mem_collector +                   & !
+              nproc_target * 8.d0 *             & ! bytes
+              ( 0.5d0*pt2_n_tasks_max           & ! task_id
+              + 64.d0*pt2_n_tasks_max           & ! task
+              + pt2_type_size(N_states)*pt2_n_tasks_max*N_states   & ! pt2, variance, overlap
+              + 1.d0*pt2_n_tasks_max            & ! i_generator, subset
+              + 1.d0*(N_int*2.d0*ii+ ii)        & ! selection buffer
+              + 1.d0*(N_int*2.d0*ii+ ii)        & ! sort selection buffer
+              + 2.0d0*(ii)                      & ! preinteresting, interesting,
+                                                  ! prefullinteresting, fullinteresting
+              + 2.0d0*(N_int*2*ii)              & ! minilist, fullminilist
+              + 1.0d0*(N_states*mo_num*mo_num)  & ! mat
+              ) / 1024.d0**3
+
+        if (nproc_target == 0) then
+          call check_mem(mem,irp_here)
+          nproc_target = 1
+          exit
+        endif
+
+        if (mem+rss < qp_max_mem) then
+          exit
+        endif
+
+        nproc_target = nproc_target - 1
+
+      enddo
+      call write_int(6,nproc_target,'Number of threads for PT2')
+      call write_double(6,mem,'Memory (Gb)')
+
+      call omp_set_max_active_levels(1)
+
+
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+      print '(A)', ' Samples          Energy                Variance               Norm^2          Seconds'
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+
+      PROVIDE global_selection_buffer
+
+      !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1)            &
+          !$OMP  PRIVATE(i)
+      i = omp_get_thread_num()
+      if (i==0) then
+
+        call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, pt2_data, pt2_data_err, b, N)
+        pt2_data % rpt2(pt2_stoch_istate) =  &
+          pt2_data % pt2(pt2_stoch_istate)/(1.d0+pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate))
+
+        !TODO : We should use here the correct formula for the error of X/Y
+        pt2_data_err % rpt2(pt2_stoch_istate) =  &
+          pt2_data_err % pt2(pt2_stoch_istate)/(1.d0 + pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate))
+
+      else
+        call pt2_slave_inproc(i)
+      endif
+      !$OMP END PARALLEL
+      call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
+      call omp_set_max_active_levels(8)
+
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+
+    do k=1,N_states
+      pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate)
+    enddo
+    SOFT_TOUCH pt2_overlap
+
+    enddo
+    FREE pt2_stoch_istate
+
+    ! Symmetrize overlap
+    do j=2,N_states
+     do i=1,j-1
+       pt2_overlap(i,j) = 0.5d0 * (pt2_overlap(i,j) + pt2_overlap(j,i))
+       pt2_overlap(j,i) = pt2_overlap(i,j)
+     enddo
+    enddo
+
+    print *, 'Overlap of perturbed states:'
+    do k=1,N_states
+      print *, pt2_overlap(k,:)
+    enddo
+    print *, '-------'
+
+    if (N_in > 0) then
+      b%cur = min(N_in,b%cur)
+      if (s2_eig) then
+        call make_selection_buffer_s2(b)
+      else
+        call remove_duplicates_in_selection_buffer(b)
+      endif
+      call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0)
+    endif
+    call delete_selection_buffer(b)
+
+    state_average_weight(:) = state_average_weight_save(:)
+    TOUCH state_average_weight
+    call update_pt2_and_variance_weights(pt2_data, N_states)
+  endif
+
+
+end subroutine
+
+
+subroutine pt2_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+
+  PROVIDE global_selection_buffer
+  call run_pt2_slave(1,i,pt2_e0_denominator)
+end
+
+
+subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_err, b, N_)
+  use f77_zmq
+  use selection_types
+  use bitmasks
+  implicit none
+
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  double precision, intent(in)   :: relative_error, E
+  type(pt2_type), intent(inout)  :: pt2_data, pt2_data_err
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in)            :: N_
+
+  type(pt2_type), allocatable    :: pt2_data_task(:)
+  type(pt2_type), allocatable    :: pt2_data_I(:)
+  type(pt2_type), allocatable    :: pt2_data_S(:)
+  type(pt2_type), allocatable    :: pt2_data_S2(:)
+  type(pt2_type)                 :: pt2_data_teeth
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+  integer, external :: zmq_delete_tasks_async_send
+  integer, external :: zmq_delete_tasks_async_recv
+  integer, external :: zmq_abort
+  integer, external :: pt2_find_sample_lr
+
+  PROVIDE pt2_stoch_istate
+
+  integer :: more, n, i, p, c, t, n_tasks, U
+  integer, allocatable :: task_id(:)
+  integer, allocatable :: index(:)
+
+  double precision :: v, x, x2, x3, avg, avg2, avg3(N_states), eqt, E0, v0, n0(N_states)
+  double precision :: eqta(N_states)
+  double precision :: time, time1, time0
+
+  integer, allocatable :: f(:)
+  logical, allocatable :: d(:)
+  logical :: do_exit, stop_now, sending
+  logical, external :: qp_stop
+  type(selection_buffer) :: b2
+
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+
+  sending =.False.
+
+  rss  = memory_of_int(pt2_n_tasks_max*2+N_det_generators*2)
+  rss += memory_of_double(N_states*N_det_generators)*3.d0
+  rss += memory_of_double(N_states*pt2_n_tasks_max)*3.d0
+  rss += memory_of_double(pt2_N_teeth+1)*4.d0
+  call check_mem(rss,irp_here)
+
+  ! If an allocation is added here, the estimate of the memory should also be
+  ! updated in ZMQ_pt2
+  allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators))
+  allocate(d(N_det_generators+1))
+  allocate(pt2_data_task(pt2_n_tasks_max))
+  allocate(pt2_data_I(N_det_generators))
+  allocate(pt2_data_S(pt2_N_teeth+1))
+  allocate(pt2_data_S2(pt2_N_teeth+1))
+
+
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+  call create_selection_buffer(N_, N_*2, b2)
+
+
+  pt2_data % pt2(pt2_stoch_istate) = -huge(1.)
+  pt2_data_err % pt2(pt2_stoch_istate) = huge(1.)
+  pt2_data % variance(pt2_stoch_istate) = huge(1.)
+  pt2_data_err % variance(pt2_stoch_istate) = huge(1.)
+  pt2_data % overlap(:,pt2_stoch_istate) = 0.d0
+  pt2_data_err % overlap(:,pt2_stoch_istate) = huge(1.)
+  n = 1
+  t = 0
+  U = 0
+  do i=1,pt2_n_tasks_max
+    call pt2_alloc(pt2_data_task(i),N_states)
+  enddo
+  do i=1,pt2_N_teeth+1
+    call pt2_alloc(pt2_data_S(i),N_states)
+    call pt2_alloc(pt2_data_S2(i),N_states)
+  enddo
+  do i=1,N_det_generators
+    call pt2_alloc(pt2_data_I(i),N_states)
+  enddo
+  f(:) = pt2_F(:)
+  d(:) = .false.
+  n_tasks = 0
+  E0 = E
+  v0 = 0.d0
+  n0(:) = 0.d0
+  more = 1
+  call wall_time(time0)
+  time1 = time0
+
+  do_exit = .false.
+  stop_now = .false.
+  do while (n <= N_det_generators)
+    if(f(pt2_J(n)) == 0) then
+      d(pt2_J(n)) = .true.
+      do while(d(U+1))
+        U += 1
+      end do
+
+      ! Deterministic part
+      do while(t <= pt2_N_teeth)
+        if(U >= pt2_n_0(t+1)) then
+          t=t+1
+          E0 = 0.d0
+          v0 = 0.d0
+          n0(:) = 0.d0
+          do i=pt2_n_0(t),1,-1
+            E0 += pt2_data_I(i) % pt2(pt2_stoch_istate)
+            v0 += pt2_data_I(i) % variance(pt2_stoch_istate)
+            n0(:) += pt2_data_I(i) % overlap(:,pt2_stoch_istate)
+          end do
+        else
+          exit
+        end if
+      end do
+
+      ! Add Stochastic part
+      c = pt2_R(n)
+      if(c > 0) then
+
+        call pt2_alloc(pt2_data_teeth,N_states)
+        do p=pt2_N_teeth, 1, -1
+          v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1))
+          i = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(p),pt2_n_0(p+1))
+          v = pt2_W_T / pt2_w(i)
+          call pt2_add ( pt2_data_teeth,  v,  pt2_data_I(i) )
+          call pt2_add ( pt2_data_S(p),  1.d0,  pt2_data_teeth )
+          call pt2_add2( pt2_data_S2(p), 1.d0,  pt2_data_teeth )
+        enddo
+        call pt2_dealloc(pt2_data_teeth)
+
+        avg  = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c)
+        avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c)
+        avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c)
+        if ((avg /= 0.d0) .or. (n == N_det_generators) ) then
+          do_exit = .true.
+        endif
+        if (qp_stop()) then
+          stop_now = .True.
+        endif
+        pt2_data % pt2(pt2_stoch_istate) = avg
+        pt2_data % variance(pt2_stoch_istate) = avg2
+        pt2_data % overlap(:,pt2_stoch_istate) = avg3(:)
+        call wall_time(time)
+        ! 1/(N-1.5) : see  Brugger, The American Statistician (23) 4 p. 32 (1969)
+        if(c > 2) then
+          eqt = dabs((pt2_data_S2(t) % pt2(pt2_stoch_istate) / c) - (pt2_data_S(t) % pt2(pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          eqt = sqrt(eqt / (dble(c) - 1.5d0))
+          pt2_data_err % pt2(pt2_stoch_istate) = eqt
+
+          eqt = dabs((pt2_data_S2(t) % variance(pt2_stoch_istate) / c) - (pt2_data_S(t) % variance(pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          eqt = sqrt(eqt / (dble(c) - 1.5d0))
+          pt2_data_err % variance(pt2_stoch_istate) = eqt
+
+          eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          eqta(:) = sqrt(eqta(:) / (dble(c) - 1.5d0))
+          pt2_data_err % overlap(:,pt2_stoch_istate) = eqta(:)
+
+
+          if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then
+            time1 = time
+            print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.4)', c, &
+              pt2_data     % pt2(pt2_stoch_istate) +E, &
+              pt2_data_err % pt2(pt2_stoch_istate), &
+              pt2_data     % variance(pt2_stoch_istate), &
+              pt2_data_err % variance(pt2_stoch_istate), &
+              pt2_data     % overlap(pt2_stoch_istate,pt2_stoch_istate), &
+              pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), &
+              time-time0
+            if (stop_now .or. (                                      &
+                  (do_exit .and. (dabs(pt2_data_err % pt2(pt2_stoch_istate)) /    &
+                  (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) ) <= relative_error))) ) then
+              if (zmq_abort(zmq_to_qp_run_socket) == -1) then
+                call sleep(10)
+                if (zmq_abort(zmq_to_qp_run_socket) == -1) then
+                  print *, irp_here, ': Error in sending abort signal (2)'
+                endif
+              endif
+            endif
+          endif
+        endif
+      end if
+      n += 1
+    else if(more == 0) then
+      exit
+    else
+      call pull_pt2_results(zmq_socket_pull, index, pt2_data_task, task_id, n_tasks, b2)
+      if(n_tasks > pt2_n_tasks_max)then
+       print*,'PB !!!'
+       print*,'If you see this, send a bug report with the following content'
+       print*,irp_here
+       print*,'n_tasks,pt2_n_tasks_max = ',n_tasks,pt2_n_tasks_max
+       stop -1
+      endif
+      if (zmq_delete_tasks_async_send(zmq_to_qp_run_socket,task_id,n_tasks,sending) == -1) then
+          stop 'PT2: Unable to delete tasks (send)'
+      endif
+      do i=1,n_tasks
+        if(index(i).gt.size(pt2_data_I,1).or.index(i).lt.1)then
+         print*,'PB !!!'
+         print*,'If you see this, send a bug report with the following content'
+         print*,irp_here
+         print*,'i,index(i),size(pt2_data_I,1) = ',i,index(i),size(pt2_data_I,1)
+         stop -1
+        endif
+        call pt2_add(pt2_data_I(index(i)),1.d0,pt2_data_task(i))
+        f(index(i)) -= 1
+      end do
+      do i=1, b2%cur
+        ! We assume the pulled buffer is sorted
+        if (b2%val(i) > b%mini) exit
+        call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i))
+      end do
+      if (zmq_delete_tasks_async_recv(zmq_to_qp_run_socket,more,sending) == -1) then
+          stop 'PT2: Unable to delete tasks (recv)'
+      endif
+    end if
+  end do
+  do i=1,N_det_generators
+    call pt2_dealloc(pt2_data_I(i))
+  enddo
+  do i=1,pt2_N_teeth+1
+    call pt2_dealloc(pt2_data_S(i))
+    call pt2_dealloc(pt2_data_S2(i))
+  enddo
+  do i=1,pt2_n_tasks_max
+    call pt2_dealloc(pt2_data_task(i))
+  enddo
+!print *,  'deleting b2'
+  call delete_selection_buffer(b2)
+!print *,  'sorting b'
+  call sort_selection_buffer(b)
+!print *,  'done'
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+
+end subroutine
+
+
+integer function pt2_find_sample(v, w)
+  implicit none
+  double precision, intent(in) :: v, w(0:N_det_generators)
+  integer, external :: pt2_find_sample_lr
+
+  pt2_find_sample = pt2_find_sample_lr(v, w, 0, N_det_generators)
+end function
+
+
+integer function pt2_find_sample_lr(v, w, l_in, r_in)
+  implicit none
+  double precision, intent(in) :: v, w(0:N_det_generators)
+  integer, intent(in) :: l_in,r_in
+  integer :: i,l,r
+
+  l=l_in
+  r=r_in
+
+  do while(r-l > 1)
+    i = shiftr(r+l,1)
+    if(w(i) < v) then
+      l = i
+    else
+      r = i
+    end if
+  end do
+  i = r
+  do r=i+1,N_det_generators
+    if (w(r) /= w(i)) then
+      exit
+    endif
+  enddo
+  pt2_find_sample_lr = r-1
+end function
+
+
+BEGIN_PROVIDER [ integer, pt2_n_tasks ]
+ implicit none
+ BEGIN_DOC
+ ! Number of parallel tasks for the Monte Carlo
+ END_DOC
+ pt2_n_tasks = N_det_generators
+END_PROVIDER
+
+BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)]
+  implicit none
+  integer, allocatable :: seed(:)
+  integer :: m,i
+  call random_seed(size=m)
+  allocate(seed(m))
+  do i=1,m
+    seed(i) = i
+  enddo
+  call random_seed(put=seed)
+  deallocate(seed)
+
+  call RANDOM_NUMBER(pt2_u)
+ END_PROVIDER
+
+ BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)]
+&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)]
+  implicit none
+  BEGIN_DOC
+! pt2_J contains the list of generators after ordering them according to the
+! Monte Carlo sampling.
+!
+! pt2_R(i) is the number of combs drawn when determinant i is computed.
+  END_DOC
+  integer                :: N_c, N_j
+  integer                :: U, t, i
+  double precision       :: v
+  integer, external      :: pt2_find_sample_lr
+
+  logical, allocatable :: pt2_d(:)
+  integer :: m,l,r,k
+  integer :: ncache
+  integer, allocatable :: ii(:,:)
+  double precision :: dt
+
+  ncache = min(N_det_generators,10000)
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_int(ncache)*dble(pt2_N_teeth) + memory_of_int(N_det_generators)
+  call check_mem(rss,irp_here)
+
+  allocate(ii(pt2_N_teeth,ncache),pt2_d(N_det_generators))
+
+  pt2_R(:) = 0
+  pt2_d(:) = .false.
+  N_c = 0
+  N_j = pt2_n_0(1)
+  do i=1,N_j
+      pt2_d(i) = .true.
+      pt2_J(i) = i
+  end do
+
+  U = 0
+  do while(N_j < pt2_n_tasks)
+
+    if (N_c+ncache > N_det_generators) then
+      ncache = N_det_generators - N_c
+    endif
+
+    !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(dt,v,t,k)
+    do k=1, ncache
+      dt = pt2_u_0
+      do t=1, pt2_N_teeth
+        v = dt + pt2_W_T *pt2_u(N_c+k)
+        dt = dt + pt2_W_T
+        ii(t,k) = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(t),pt2_n_0(t+1))
+      end do
+    enddo
+    !$OMP END PARALLEL DO
+
+    do k=1,ncache
+      !ADD_COMB
+      N_c = N_c+1
+      do t=1, pt2_N_teeth
+        i = ii(t,k)
+        if(.not. pt2_d(i)) then
+          N_j += 1
+          pt2_J(N_j) = i
+          pt2_d(i) = .true.
+        end if
+      end do
+
+      pt2_R(N_j) = N_c
+
+      !FILL_TOOTH
+      do while(U < N_det_generators)
+        U += 1
+        if(.not. pt2_d(U)) then
+          N_j += 1
+          pt2_J(N_j) = U
+          pt2_d(U) = .true.
+          exit
+        end if
+      end do
+      if (N_j >= pt2_n_tasks) exit
+    end do
+  enddo
+
+  if(N_det_generators > 1) then
+    pt2_R(N_det_generators-1) = 0
+    pt2_R(N_det_generators) = N_c
+  end if
+
+  deallocate(ii,pt2_d)
+
+END_PROVIDER
+
+
+
+ BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ]
+&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ]
+&BEGIN_PROVIDER [ double precision, pt2_W_T ]
+&BEGIN_PROVIDER [ double precision, pt2_u_0 ]
+&BEGIN_PROVIDER [ integer,          pt2_n_0, (pt2_N_teeth+1) ]
+   implicit none
+   integer                        :: i, t
+   double precision, allocatable  :: tilde_w(:), tilde_cW(:)
+   double precision               :: r, tooth_width
+   integer, external              :: pt2_find_sample
+
+   double precision               :: rss
+   double precision, external     :: memory_of_double, memory_of_int
+   rss = memory_of_double(2*N_det_generators+1)
+   call check_mem(rss,irp_here)
+
+   if (N_det_generators == 1) then
+
+     pt2_w(1)   = 1.d0
+     pt2_cw(1)  = 1.d0
+     pt2_u_0    = 1.d0
+     pt2_W_T    = 0.d0
+     pt2_n_0(1) = 0
+     pt2_n_0(2) = 1
+
+   else
+
+     allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
+
+     tilde_cW(0) = 0d0
+
+     do i=1,N_det_generators
+       tilde_w(i)  = psi_coef_sorted_tc_gen(i,pt2_stoch_istate)**2 !+ 1.d-20
+     enddo
+
+     double precision               :: norm2
+     norm2 = 0.d0
+     do i=N_det_generators,1,-1
+       norm2 += tilde_w(i)
+     enddo
+
+     tilde_w(:) = tilde_w(:) / norm2
+
+     tilde_cW(0) = -1.d0
+     do i=1,N_det_generators
+       tilde_cW(i) = tilde_cW(i-1) + tilde_w(i)
+     enddo
+     tilde_cW(:) = tilde_cW(:) + 1.d0
+
+     pt2_n_0(1) = 0
+     do
+     pt2_u_0 = tilde_cW(pt2_n_0(1))
+     r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth)
+     pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth)
+     if(pt2_W_T >= r - pt2_u_0) then
+       exit
+     end if
+     pt2_n_0(1) += 1
+     if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then
+       print *, "teeth building failed"
+       stop -1
+     end if
+   end do
+
+   do t=2, pt2_N_teeth
+     r = pt2_u_0 + pt2_W_T * dble(t-1)
+     pt2_n_0(t) = pt2_find_sample(r, tilde_cW)
+   end do
+   pt2_n_0(pt2_N_teeth+1) = N_det_generators
+
+   pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1))
+   do t=1, pt2_N_teeth
+     tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t))
+     if (tooth_width == 0.d0) then
+       tooth_width = sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1)))
+     endif
+     ASSERT(tooth_width > 0.d0)
+     do i=pt2_n_0(t)+1, pt2_n_0(t+1)
+       pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width
+     end do
+   end do
+
+   pt2_cW(0) = 0d0
+   do i=1,N_det_generators
+     pt2_cW(i) = pt2_cW(i-1) + pt2_w(i)
+   end do
+   pt2_n_0(pt2_N_teeth+1) = N_det_generators
+
+ endif
+END_PROVIDER
+
+
+
+
+
diff --git a/src/cipsi_tc_bi_ortho/pt2_type.irp.f b/src/cipsi_tc_bi_ortho/pt2_type.irp.f
new file mode 100644
index 00000000..ee90d421
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/pt2_type.irp.f
@@ -0,0 +1,128 @@
+subroutine pt2_alloc(pt2_data,N)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  integer, intent(in)           :: N
+  integer :: k
+
+  allocate(pt2_data % pt2(N)           &
+          ,pt2_data % variance(N)      &
+          ,pt2_data % rpt2(N)          &
+          ,pt2_data % overlap(N,N)     &
+          )
+
+  pt2_data % pt2(:)           = 0.d0
+  pt2_data % variance(:)      = 0.d0
+  pt2_data % rpt2(:)          = 0.d0
+  pt2_data % overlap(:,:)     = 0.d0
+
+end subroutine
+
+subroutine pt2_dealloc(pt2_data)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  deallocate(pt2_data % pt2         &
+            ,pt2_data % variance    &
+            ,pt2_data % rpt2        &
+            ,pt2_data % overlap     &
+            )
+end subroutine
+
+subroutine pt2_add(p1, w, p2)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! p1 += w * p2
+  END_DOC
+  type(pt2_type), intent(inout) :: p1
+  double precision, intent(in)  :: w
+  type(pt2_type), intent(in)    :: p2
+
+  if (w == 1.d0) then
+
+    p1 % pt2(:)            = p1 % pt2(:)           + p2 % pt2(:)
+    p1 % rpt2(:)           = p1 % rpt2(:)          + p2 % rpt2(:)
+    p1 % variance(:)       = p1 % variance(:)      + p2 % variance(:)
+    p1 % overlap(:,:)      = p1 % overlap(:,:)     + p2 % overlap(:,:)
+
+  else
+
+    p1 % pt2(:)            = p1 % pt2(:)           + w * p2 % pt2(:)
+    p1 % rpt2(:)           = p1 % rpt2(:)          + w * p2 % rpt2(:)
+    p1 % variance(:)       = p1 % variance(:)      + w * p2 % variance(:)
+    p1 % overlap(:,:)      = p1 % overlap(:,:)     + w * p2 % overlap(:,:)
+
+  endif
+
+end subroutine
+
+
+subroutine pt2_add2(p1, w, p2)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! p1 += w * p2**2
+  END_DOC
+  type(pt2_type), intent(inout) :: p1
+  double precision, intent(in)  :: w
+  type(pt2_type), intent(in)    :: p2
+
+  if (w == 1.d0) then
+
+    p1 % pt2(:)           = p1 % pt2(:)           + p2 % pt2(:)           * p2 % pt2(:)
+    p1 % rpt2(:)          = p1 % rpt2(:)          + p2 % rpt2(:)          * p2 % rpt2(:)
+    p1 % variance(:)      = p1 % variance(:)      + p2 % variance(:)      * p2 % variance(:)
+    p1 % overlap(:,:)     = p1 % overlap(:,:)     + p2 % overlap(:,:)     * p2 % overlap(:,:)
+
+  else
+
+    p1 % pt2(:)           = p1 % pt2(:)           + w * p2 % pt2(:)           * p2 % pt2(:)
+    p1 % rpt2(:)          = p1 % rpt2(:)          + w * p2 % rpt2(:)          * p2 % rpt2(:)
+    p1 % variance(:)      = p1 % variance(:)      + w * p2 % variance(:)      * p2 % variance(:)
+    p1 % overlap(:,:)     = p1 % overlap(:,:)     + w * p2 % overlap(:,:)     * p2 % overlap(:,:)
+
+  endif
+
+end subroutine
+
+
+subroutine pt2_serialize(pt2_data, n, x)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(in)    :: pt2_data
+  integer, intent(in)           :: n
+  double precision, intent(out) :: x(*)
+
+  integer :: i,k,n2
+
+  n2 = n*n
+  x(1:n)           =  pt2_data % pt2(1:n)
+  k=n
+  x(k+1:k+n)     =  pt2_data % rpt2(1:n)
+  k=k+n
+  x(k+1:k+n)     =  pt2_data % variance(1:n)
+  k=k+n
+  x(k+1:k+n2)  =  reshape(pt2_data % overlap(1:n,1:n), (/ n2 /))
+
+end
+
+subroutine pt2_deserialize(pt2_data, n, x)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: x(*)
+
+  integer :: i,k,n2
+
+  n2 = n*n
+  pt2_data % pt2(1:n)           =   x(1:n)
+  k=n
+  pt2_data % rpt2(1:n)          =   x(k+1:k+n)
+  k=k+n
+  pt2_data % variance(1:n)      =   x(k+1:k+n)
+  k=k+n
+  pt2_data % overlap(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /))
+
+end
diff --git a/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f b/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f
new file mode 100644
index 00000000..aa6546e7
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f
@@ -0,0 +1,549 @@
+ use omp_lib
+ use selection_types
+ use f77_zmq
+BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ]
+ use omp_lib
+ implicit none
+ BEGIN_DOC
+ ! Global buffer for the OpenMP selection
+ END_DOC
+ call omp_init_lock(global_selection_buffer_lock)
+END_PROVIDER
+
+BEGIN_PROVIDER [ type(selection_buffer), global_selection_buffer ]
+ use omp_lib
+ implicit none
+ BEGIN_DOC
+ ! Global buffer for the OpenMP selection
+ END_DOC
+ call omp_set_lock(global_selection_buffer_lock)
+ call delete_selection_buffer(global_selection_buffer)
+ call create_selection_buffer(N_det_generators, 2*N_det_generators, &
+    global_selection_buffer)
+ call omp_unset_lock(global_selection_buffer_lock)
+END_PROVIDER
+
+
+subroutine run_pt2_slave(thread,iproc,energy)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states_diag)
+  integer,  intent(in)            :: thread, iproc
+  if (N_det > 100000 ) then
+    call run_pt2_slave_large(thread,iproc,energy)
+  else
+    call run_pt2_slave_small(thread,iproc,energy)
+  endif
+end
+
+subroutine run_pt2_slave_small(thread,iproc,energy)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states_diag)
+  integer,  intent(in)            :: thread, iproc
+  integer                         :: rc, i
+
+  integer                        :: worker_id, ctask, ltask
+  character*(512), allocatable   :: task(:)
+  integer, allocatable           :: task_id(:)
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket
+  integer(ZMQ_PTR)               :: zmq_socket_push
+
+  type(selection_buffer) :: b
+  logical :: done, buffer_ready
+
+  type(pt2_type), allocatable :: pt2_data(:)
+  integer :: n_tasks, k, N
+  integer, allocatable :: i_generator(:), subset(:)
+
+  double precision, external :: memory_of_double, memory_of_int
+  integer :: bsize ! Size of selection buffers
+
+  allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
+  allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  b%N = 0
+  buffer_ready = .False.
+  n_tasks = 1
+
+  done = .False.
+  do while (.not.done)
+
+    n_tasks = max(1,n_tasks)
+    n_tasks = min(pt2_n_tasks_max,n_tasks)
+
+    integer, external :: get_tasks_from_taskserver
+    if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
+      exit
+    endif
+    done = task_id(n_tasks) == 0
+    if (done) then
+      n_tasks = n_tasks-1
+    endif
+    if (n_tasks == 0) exit
+
+    do k=1,n_tasks
+      call sscanf_ddd(task(k), subset(k), i_generator(k), N)
+    enddo
+    if (b%N == 0) then
+      ! Only first time
+      bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
+      call create_selection_buffer(bsize, bsize*2, b)
+      buffer_ready = .True.
+    else
+      ASSERT (b%N == bsize)
+    endif
+
+    double precision :: time0, time1
+    call wall_time(time0)
+    do k=1,n_tasks
+      call pt2_alloc(pt2_data(k),N_states)
+      b%cur = 0
+      call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k)))
+    enddo
+    call wall_time(time1)
+
+    integer, external :: tasks_done_to_taskserver
+    if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
+      done = .true.
+    endif
+    call sort_selection_buffer(b)
+    call push_pt2_results(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks)
+    do k=1,n_tasks
+      call pt2_dealloc(pt2_data(k))
+    enddo
+    b%cur=0
+
+!    ! Try to adjust n_tasks around nproc/2 seconds per job
+    n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
+    n_tasks = min(n_tasks, pt2_n_tasks_max)
+!    n_tasks = 1
+  end do
+
+  integer, external :: disconnect_from_taskserver
+  do i=1,300
+    if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit
+    call usleep(500)
+    print *,  'Retry disconnect...'
+  end do
+
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  if (buffer_ready) then
+    call delete_selection_buffer(b)
+  endif
+  deallocate(pt2_data)
+end subroutine
+
+
+subroutine run_pt2_slave_large(thread,iproc,energy)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states_diag)
+  integer,  intent(in)            :: thread, iproc
+  integer                         :: rc, i
+
+  integer                        :: worker_id, ctask, ltask
+  character*(512)                :: task
+  integer                        :: task_id(1)
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket
+  integer(ZMQ_PTR)               :: zmq_socket_push
+
+  type(selection_buffer) :: b
+  logical :: done, buffer_ready
+
+  type(pt2_type) :: pt2_data
+  integer :: n_tasks, k, N
+  integer :: i_generator, subset
+
+  integer :: bsize ! Size of selection buffers
+  logical :: sending
+  double precision :: time_shift
+
+  PROVIDE global_selection_buffer global_selection_buffer_lock
+
+  call random_number(time_shift)
+  time_shift = time_shift*15.d0
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  b%N = 0
+  buffer_ready = .False.
+  n_tasks = 1
+
+  sending = .False.
+  done = .False.
+  double precision :: time0, time1
+  call wall_time(time0)
+  time0 = time0+time_shift
+  do while (.not.done)
+
+    integer, external :: get_tasks_from_taskserver
+    if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
+      exit
+    endif
+    done = task_id(1) == 0
+    if (done) then
+      n_tasks = n_tasks-1
+    endif
+    if (n_tasks == 0) exit
+
+    call sscanf_ddd(task, subset, i_generator, N)
+    if( pt2_F(i_generator) <= 0 .or. pt2_F(i_generator) > N_det ) then
+      print *,  irp_here
+      stop 'bug in selection'
+    endif
+    if (b%N == 0) then
+      ! Only first time
+      bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
+      call create_selection_buffer(bsize, bsize*2, b)
+      buffer_ready = .True.
+    else
+      ASSERT (b%N == bsize)
+    endif
+
+    call pt2_alloc(pt2_data,N_states)
+    b%cur = 0
+    call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator))
+
+    integer, external :: tasks_done_to_taskserver
+    if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
+      done = .true.
+    endif
+    call sort_selection_buffer(b)
+
+    call wall_time(time1)
+!    if (time1-time0 > 15.d0) then
+      call omp_set_lock(global_selection_buffer_lock)
+      global_selection_buffer%mini = b%mini
+      call merge_selection_buffers(b,global_selection_buffer)
+      b%cur=0
+      call omp_unset_lock(global_selection_buffer_lock)
+      call wall_time(time0)
+!    endif
+
+    call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
+    if ( iproc == 1 .or. i_generator < 100 .or. done) then
+      call omp_set_lock(global_selection_buffer_lock)
+      call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending)
+      global_selection_buffer%cur = 0
+      call omp_unset_lock(global_selection_buffer_lock)
+    else
+      call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), b, (/task_id/), 1,sending)
+    endif
+
+    call pt2_dealloc(pt2_data)
+  end do
+  call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
+
+  integer, external :: disconnect_from_taskserver
+  do i=1,300
+    if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit
+    call sleep(1)
+    print *,  'Retry disconnect...'
+  end do
+
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  if (buffer_ready) then
+    call delete_selection_buffer(b)
+  endif
+  FREE global_selection_buffer
+end subroutine
+
+
+subroutine push_pt2_results(zmq_socket_push, index, pt2_data, b, task_id, n_tasks)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  type(pt2_type), intent(in)     :: pt2_data(n_tasks)
+  integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
+  type(selection_buffer), intent(inout) :: b
+
+  logical :: sending
+  sending = .False.
+  call push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending)
+  call push_pt2_results_async_recv(zmq_socket_push, b%mini, sending)
+end subroutine
+
+
+subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  type(pt2_type), intent(in)     :: pt2_data(n_tasks)
+  integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
+  type(selection_buffer), intent(inout) :: b
+  logical, intent(inout) :: sending
+  integer :: rc, i
+  integer*8 :: rc8
+  double precision, allocatable :: pt2_serialized(:,:)
+
+  if (sending) then
+    print *,  irp_here, ': sending is true'
+    stop -1
+  endif
+  sending = .True.
+
+  rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 1
+    return
+  else if(rc /= 4) then
+    stop 'push'
+  endif
+
+
+  rc = f77_zmq_send( zmq_socket_push, index, 4*n_tasks, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 2
+    return
+  else if(rc /= 4*n_tasks) then
+    stop 'push'
+  endif
+
+
+  allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) )
+  do i=1,n_tasks
+    call pt2_serialize(pt2_data(i),N_states,pt2_serialized(1,i))
+  enddo
+
+  rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE)
+  deallocate(pt2_serialized)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 3
+    return
+  else if(rc /= size(pt2_serialized)*8) then
+    stop 'push'
+  endif
+
+
+  rc = f77_zmq_send( zmq_socket_push, task_id, n_tasks*4, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 6
+    return
+  else if(rc /= 4*n_tasks) then
+    stop 'push'
+  endif
+
+
+  if (b%cur == 0) then
+
+    rc = f77_zmq_send( zmq_socket_push, b%cur, 4, 0)
+    if (rc == -1) then
+      print *,  irp_here, ': error sending result'
+      stop 7
+      return
+    else if(rc /= 4) then
+      stop 'push'
+    endif
+
+  else
+
+    rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)
+    if (rc == -1) then
+      print *,  irp_here, ': error sending result'
+      stop 7
+      return
+    else if(rc /= 4) then
+      stop 'push'
+    endif
+
+
+    rc8 = f77_zmq_send8( zmq_socket_push, b%val, 8_8*int(b%cur,8), ZMQ_SNDMORE)
+    if (rc8 == -1_8) then
+      print *,  irp_here, ': error sending result'
+      stop 8
+      return
+    else if(rc8 /= 8_8*int(b%cur,8)) then
+      stop 'push'
+    endif
+
+
+    rc8 = f77_zmq_send8( zmq_socket_push, b%det, int(bit_kind*N_int*2,8)*int(b%cur,8), 0)
+    if (rc8 == -1_8) then
+      print *,  irp_here, ': error sending result'
+      stop 9
+      return
+    else if(rc8 /= int(N_int*2*8,8)*int(b%cur,8)) then
+      stop 'push'
+    endif
+
+  endif
+
+end subroutine
+
+subroutine push_pt2_results_async_recv(zmq_socket_push,mini,sending)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)    :: zmq_socket_push
+  double precision, intent(out) :: mini
+  logical, intent(inout) :: sending
+  integer :: rc
+
+  if (.not.sending) return
+
+! Activate is zmq_socket_push is a REQ
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  character*(2) :: ok
+  rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 10
+    return
+  else if ((rc /= 2).and.(ok(1:2) /= 'ok')) then
+    print *,  irp_here//': error in receiving ok'
+    stop -1
+  endif
+  rc = f77_zmq_recv( zmq_socket_push, mini, 8, 0)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 11
+    return
+  else if (rc /= 8) then
+    print *,  irp_here//': error in receiving mini'
+    stop 12
+  endif
+IRP_ENDIF
+  sending = .False.
+end subroutine
+
+
+
+subroutine pull_pt2_results(zmq_socket_pull, index, pt2_data, task_id, n_tasks, b)
+ use selection_types
+ use f77_zmq
+  implicit none
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  type(pt2_type), intent(inout)  :: pt2_data(*)
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(out) :: index(*)
+  integer, intent(out) :: n_tasks, task_id(*)
+  integer :: rc, rn, i
+  integer*8 :: rc8
+  double precision, allocatable :: pt2_serialized(:,:)
+
+  rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4) then
+    stop 'pull'
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, index, 4*n_tasks, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4*n_tasks) then
+    stop 'pull'
+  endif
+
+  allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) )
+  rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized)*n_tasks, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 8*size(pt2_serialized)) then
+    stop 'pull'
+  endif
+
+  do i=1,n_tasks
+    call pt2_deserialize(pt2_data(i),N_states,pt2_serialized(1,i))
+  enddo
+  deallocate(pt2_serialized)
+
+  rc = f77_zmq_recv( zmq_socket_pull, task_id, n_tasks*4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4*n_tasks) then
+    stop 'pull'
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, b%cur, 4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4) then
+    stop 'pull'
+  endif
+
+  if (b%cur > 0) then
+
+    rc8 = f77_zmq_recv8( zmq_socket_pull, b%val, 8_8*int(b%cur,8), 0)
+    if (rc8 == -1_8) then
+      n_tasks = 1
+      task_id(1) = 0
+    else if(rc8 /= 8_8*int(b%cur,8)) then
+      stop 'pull'
+    endif
+
+    rc8 = f77_zmq_recv8( zmq_socket_pull, b%det, int(bit_kind*N_int*2,8)*int(b%cur,8), 0)
+    if (rc8 == -1_8) then
+      n_tasks = 1
+      task_id(1) = 0
+    else if(rc8 /= int(N_int*2*8,8)*int(b%cur,8)) then
+      stop 'pull'
+    endif
+
+  endif
+
+! Activate is zmq_socket_pull is a REP
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, ZMQ_SNDMORE)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if (rc /= 2) then
+    print *,  irp_here//': error in sending ok'
+    stop -1
+  endif
+  rc = f77_zmq_send( zmq_socket_pull, b%mini, 8, 0)
+IRP_ENDIF
+
+end subroutine
+
diff --git a/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f b/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f
new file mode 100644
index 00000000..e6b016fa
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f
@@ -0,0 +1,255 @@
+subroutine run_selection_slave(thread, iproc, energy)
+
+  use f77_zmq
+  use selection_types
+
+  implicit none
+
+  double precision, intent(in) :: energy(N_states)
+  integer,          intent(in) :: thread, iproc
+
+  integer                      :: rc, i
+  integer                      :: worker_id, task_id(1), ctask, ltask
+  character*(512)              :: task
+  integer(ZMQ_PTR)             :: zmq_to_qp_run_socket
+  integer(ZMQ_PTR)             :: zmq_socket_push
+  integer(ZMQ_PTR), external   :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR), external   :: new_zmq_push_socket
+  type(selection_buffer)       :: buf, buf2
+  type(pt2_type)               :: pt2_data
+  logical                      :: done, buffer_ready
+
+  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+  PROVIDE psi_bilinear_matrix_transp_order N_int pt2_F pseudo_sym
+  PROVIDE psi_selectors_coef_transp psi_det_sorted_tc weight_selection
+
+  call pt2_alloc(pt2_data,N_states)
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  buf%N = 0
+  buffer_ready = .False.
+  ctask = 1
+
+  do
+    integer, external :: get_task_from_taskserver
+    if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) == -1) then
+      exit
+    endif
+    done = task_id(ctask) == 0
+    if (done) then
+      ctask = ctask - 1
+    else
+      integer :: i_generator, N, subset, bsize
+      call sscanf_ddd(task, subset, i_generator, N)
+      if(buf%N == 0) then
+        ! Only first time
+        call create_selection_buffer(N, N*2, buf)
+        buffer_ready = .True.
+      else
+        if (N /= buf%N) then
+          print *, 'N=', N
+          print *, 'buf%N=', buf%N
+          print *, 'bug in ', irp_here
+          stop '-1'
+        end if
+      end if
+      call select_connected(i_generator, energy, pt2_data, buf,subset, pt2_F(i_generator))
+    endif
+
+    integer, external :: task_done_to_taskserver
+
+    if(done .or. ctask == size(task_id)) then
+      do i=1, ctask
+         if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then
+           call usleep(100)
+          if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then
+            ctask = 0
+            done = .true.
+            exit
+          endif
+         endif
+      end do
+      if(ctask > 0) then
+        call sort_selection_buffer(buf)
+!        call merge_selection_buffers(buf,buf2)
+        call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask)
+        call pt2_dealloc(pt2_data)
+        call pt2_alloc(pt2_data,N_states)
+!        buf%mini = buf2%mini
+        buf%cur = 0
+      end if
+      ctask = 0
+    end if
+
+    if(done) exit
+    ctask = ctask + 1
+  end do
+
+  if(ctask > 0) then
+    call sort_selection_buffer(buf)
+!    call merge_selection_buffers(buf,buf2)
+    call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask)
+!    buf%mini = buf2%mini
+    buf%cur = 0
+  end if
+  ctask = 0
+  call pt2_dealloc(pt2_data)
+
+  integer, external :: disconnect_from_taskserver
+  if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
+    continue
+  endif
+
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  if (buffer_ready) then
+    call delete_selection_buffer(buf)
+!    call delete_selection_buffer(buf2)
+  endif
+end subroutine
+
+
+subroutine push_selection_results(zmq_socket_push, pt2_data, b, task_id, ntasks)
+  use f77_zmq
+  use selection_types
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  type(pt2_type), intent(in)     :: pt2_data
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in) :: ntasks, task_id(*)
+  integer :: rc
+  double precision, allocatable :: pt2_serialized(:)
+
+  rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)
+  if(rc /= 4) then
+    print *,  'f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)'
+  endif
+
+
+  allocate(pt2_serialized (pt2_type_size(N_states)) )
+  call pt2_serialize(pt2_data,N_states,pt2_serialized)
+
+  rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 3
+    return
+  else if(rc /= size(pt2_serialized)*8) then
+    stop 'push'
+  endif
+  deallocate(pt2_serialized)
+
+  if (b%cur > 0) then
+
+      rc = f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE)
+      if(rc /= 8*b%cur) then
+        print *,  'f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE)'
+      endif
+
+      rc = f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE)
+      if(rc /= bit_kind*N_int*2*b%cur) then
+        print *,  'f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE)'
+      endif
+
+  endif
+
+  rc = f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)
+  if(rc /= 4) then
+    print *,  'f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)'
+  endif
+
+  rc = f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)
+  if(rc /= 4*ntasks) then
+    print *,  'f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)'
+  endif
+
+! Activate is zmq_socket_push is a REQ
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  character*(2) :: ok
+  rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
+  if ((rc /= 2).and.(ok(1:2) /= 'ok')) then
+    print *,  irp_here//': error in receiving ok'
+    stop -1
+  endif
+IRP_ENDIF
+
+end subroutine
+
+
+subroutine pull_selection_results(zmq_socket_pull, pt2_data, val, det, N, task_id, ntasks)
+  use f77_zmq
+  use selection_types
+  implicit none
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  type(pt2_type), intent(inout) :: pt2_data
+  double precision, intent(out) :: val(*)
+  integer(bit_kind), intent(out) :: det(N_int, 2, *)
+  integer, intent(out) :: N, ntasks, task_id(*)
+  integer :: rc, rn, i
+  double precision, allocatable :: pt2_serialized(:)
+
+  rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0)
+  if(rc /= 4) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, N, 4, 0)'
+  endif
+
+  allocate(pt2_serialized (pt2_type_size(N_states)) )
+  rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized), 0)
+  if (rc == -1) then
+    ntasks = 1
+    task_id(1) = 0
+  else if(rc /= 8*size(pt2_serialized)) then
+    stop 'pull'
+  endif
+
+  call pt2_deserialize(pt2_data,N_states,pt2_serialized)
+  deallocate(pt2_serialized)
+
+  if (N>0) then
+      rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)
+      if(rc /= 8*N) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0)
+      if(rc /= bit_kind*N_int*2*N) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0)'
+      endif
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)
+  if(rc /= 4) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)'
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)
+  if(rc /= 4*ntasks) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)'
+  endif
+
+! Activate is zmq_socket_pull is a REP
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0)
+  if (rc /= 2) then
+    print *,  irp_here//': error in sending ok'
+    stop -1
+  endif
+IRP_ENDIF
+end subroutine
+
+
+
diff --git a/src/cipsi_tc_bi_ortho/selection.irp.f b/src/cipsi_tc_bi_ortho/selection.irp.f
new file mode 100644
index 00000000..6b93f663
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/selection.irp.f
@@ -0,0 +1,1029 @@
+use bitmasks
+
+! ---
+
+subroutine select_connected(i_generator, E0, pt2_data, b, subset, csubset)
+
+  use bitmasks
+  use selection_types
+
+  implicit none
+  integer,                intent(in)    :: i_generator, subset, csubset
+  double precision,       intent(in)    :: E0(N_states)
+  type(selection_buffer), intent(inout) :: b
+  type(pt2_type),         intent(inout) :: pt2_data
+
+  integer                               :: k, l
+  integer(bit_kind)                     :: hole_mask(N_int,2), particle_mask(N_int,2)
+  double precision, allocatable         :: fock_diag_tmp(:,:)
+
+  allocate(fock_diag_tmp(2,mo_num+1))
+
+  call build_fock_tmp(fock_diag_tmp, psi_det_generators(1,1,i_generator), N_int)
+
+  do k = 1, N_int
+      hole_mask(k,1)     = iand(generators_bitmask(k,1,s_hole), psi_det_generators(k,1,i_generator))
+      hole_mask(k,2)     = iand(generators_bitmask(k,2,s_hole), psi_det_generators(k,2,i_generator))
+      particle_mask(k,1) = iand(generators_bitmask(k,1,s_part), not(psi_det_generators(k,1,i_generator)) )
+      particle_mask(k,2) = iand(generators_bitmask(k,2,s_part), not(psi_det_generators(k,2,i_generator)) )
+  enddo
+  call select_singles_and_doubles(i_generator, hole_mask, particle_mask, fock_diag_tmp, E0, pt2_data, b, subset, csubset)
+
+  deallocate(fock_diag_tmp)
+
+end subroutine select_connected
+
+! ---
+
+subroutine select_singles_and_doubles(i_generator, hole_mask,particle_mask, fock_diag_tmp, E0, pt2_data, buf, subset, csubset)
+
+  BEGIN_DOC
+  !  WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted_tc
+  END_DOC
+
+  use bitmasks
+  use selection_types
+  implicit none
+
+  integer,           intent(in)         :: i_generator, subset, csubset
+  integer(bit_kind), intent(in)         :: hole_mask(N_int,2), particle_mask(N_int,2)
+  double precision,  intent(in)         :: fock_diag_tmp(mo_num)
+  double precision,  intent(in)         :: E0(N_states)
+  type(pt2_type),         intent(inout) :: pt2_data
+  type(selection_buffer), intent(inout) :: buf
+
+  double precision, parameter           :: norm_thr = 1.d-16
+
+  integer                               :: h1, h2, s1, s2, s3, i1, i2, ib, sp, k, i, j, nt, ii, sze
+  integer                               :: maskInd
+  integer                               :: N_holes(2), N_particles(2)
+  integer                               :: hole_list(N_int*bit_kind_size,2)
+  integer                               :: particle_list(N_int*bit_kind_size,2)
+  integer                               :: l_a, nmax, idx
+  integer                               :: nb_count, maskInd_save
+  integer(bit_kind)                     :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2)
+  integer(bit_kind)                     :: mobMask(N_int, 2), negMask(N_int, 2)
+  logical                               :: fullMatch, ok
+  logical                               :: monoAdo, monoBdo
+  logical                               :: monoBdo_save
+  logical                               :: found
+
+  integer, allocatable                  :: preinteresting(:), prefullinteresting(:)
+  integer, allocatable                  :: interesting(:), fullinteresting(:)
+  integer, allocatable                  :: tmp_array(:)
+  integer, allocatable                  :: indices(:), exc_degree(:), iorder(:)
+  integer(bit_kind), allocatable        :: minilist(:, :, :), fullminilist(:, :, :)
+  logical, allocatable                  :: banned(:,:,:), bannedOrb(:,:)
+  double precision, allocatable         :: coef_fullminilist_rev(:,:)
+  double precision, allocatable         :: mat(:,:,:), mat_p(:,:,:), mat_m(:,:,:)
+
+
+  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp
+  PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp
+
+  PROVIDE banned_excitation
+
+  monoAdo = .true.
+  monoBdo = .true.
+
+  do k = 1, N_int
+    hole    (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1))
+    hole    (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2))
+    particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1))
+    particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2))
+  enddo
+
+  call bitstring_to_list_ab(hole    , hole_list    , N_holes    , N_int)
+  call bitstring_to_list_ab(particle, particle_list, N_particles, N_int)
+
+  allocate( indices(N_det), exc_degree( max(N_det_alpha_unique, N_det_beta_unique) ) )
+
+  ! Pre-compute excitation degrees wrt alpha determinants
+  k = 1
+  do i = 1, N_det_alpha_unique
+    call get_excitation_degree_spin(psi_det_alpha_unique(1,i), psi_det_generators(1,1,i_generator), exc_degree(i), N_int)
+  enddo
+
+  ! Iterate on 0SD beta, and find alphas 0SDTQ such that exc_degree <= 4
+  do j = 1, N_det_beta_unique
+    call get_excitation_degree_spin(psi_det_beta_unique(1,j), psi_det_generators(1,2,i_generator), nt, N_int)
+    if (nt > 2) cycle
+    do l_a = psi_bilinear_matrix_columns_loc(j), psi_bilinear_matrix_columns_loc(j+1)-1
+      i = psi_bilinear_matrix_rows(l_a)
+      if(nt + exc_degree(i) <= 4) then
+        idx = psi_det_sorted_tc_order(psi_bilinear_matrix_order(l_a))
+        if (psi_average_norm_contrib_sorted_tc(idx) > norm_thr) then
+          indices(k) = idx
+          k = k + 1
+        endif
+      endif
+    enddo
+  enddo
+
+  ! Pre-compute excitation degrees wrt beta determinants
+  do i = 1, N_det_beta_unique
+    call get_excitation_degree_spin(psi_det_beta_unique(1,i), psi_det_generators(1,2,i_generator), exc_degree(i), N_int)
+  enddo
+
+  ! Iterate on 0S alpha, and find betas TQ such that exc_degree <= 4
+  ! Remove also contributions < 1.d-20)
+  do j = 1, N_det_alpha_unique
+    call get_excitation_degree_spin(psi_det_alpha_unique(1,j), psi_det_generators(1,1,i_generator), nt, N_int)
+    if (nt > 1) cycle
+    do l_a = psi_bilinear_matrix_transp_rows_loc(j), psi_bilinear_matrix_transp_rows_loc(j+1)-1
+      i = psi_bilinear_matrix_transp_columns(l_a)
+      if(exc_degree(i) < 3) cycle
+      if(nt + exc_degree(i) <= 4) then
+        idx = psi_det_sorted_tc_order(                                  &
+            psi_bilinear_matrix_order(                               &
+            psi_bilinear_matrix_transp_order(l_a)))
+        if(psi_average_norm_contrib_sorted_tc(idx) > norm_thr) then
+          indices(k) = idx
+          k = k + 1
+        endif
+      endif
+    enddo
+  enddo
+
+  deallocate(exc_degree)
+  nmax = k - 1
+
+  call isort_noidx(indices,nmax)
+
+  ! Start with 32 elements. Size will double along with the filtering.
+  allocate(preinteresting(0:32), prefullinteresting(0:32), interesting(0:32), fullinteresting(0:32))
+  preinteresting(:) = 0
+  prefullinteresting(:) = 0
+
+  do i = 1, N_int
+    negMask(i,1) = not(psi_det_generators(i,1,i_generator))
+    negMask(i,2) = not(psi_det_generators(i,2,i_generator))
+  enddo
+
+  do k = 1, nmax
+
+    i = indices(k)
+    mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_tc(1,1,i))
+    mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_tc(1,2,i))
+    nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+    do j = 2, N_int
+      mobMask(j,1) = iand(negMask(j,1), psi_det_sorted_tc(j,1,i))
+      mobMask(j,2) = iand(negMask(j,2), psi_det_sorted_tc(j,2,i))
+      nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
+    enddo
+
+    if(nt <= 4) then
+      if(i <= N_det_selectors) then
+        sze = preinteresting(0)
+        if(sze+1 == size(preinteresting)) then
+          allocate(tmp_array(0:sze))
+          tmp_array(0:sze) = preinteresting(0:sze)
+          deallocate(preinteresting)
+          allocate(preinteresting(0:2*sze))
+          preinteresting(0:sze) = tmp_array(0:sze)
+          deallocate(tmp_array)
+        endif
+        preinteresting(0) = sze+1
+        preinteresting(sze+1) = i
+      elseif(nt <= 2) then
+        sze = prefullinteresting(0)
+        if(sze+1 == size(prefullinteresting)) then
+          allocate (tmp_array(0:sze))
+          tmp_array(0:sze) = prefullinteresting(0:sze)
+          deallocate(prefullinteresting)
+          allocate(prefullinteresting(0:2*sze))
+          prefullinteresting(0:sze) = tmp_array(0:sze)
+          deallocate(tmp_array)
+        endif
+        prefullinteresting(0) = sze+1
+        prefullinteresting(sze+1) = i
+      endif
+    endif
+
+  enddo
+  deallocate(indices)
+
+  allocate( banned(mo_num, mo_num,2), bannedOrb(mo_num, 2) )
+  allocate( mat(N_states, mo_num, mo_num) )
+  allocate( mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num) )
+  maskInd = -1
+
+  do s1 = 1, 2
+    do i1 = N_holes(s1), 1, -1   ! Generate low excitations first
+
+      found = .False.
+      monoBdo_save = monoBdo
+      maskInd_save = maskInd
+      do s2 = s1, 2
+        ib = 1
+        if(s1 == s2) ib = i1+1
+        do i2 = N_holes(s2), ib, -1
+          maskInd = maskInd + 1
+          if(mod(maskInd, csubset) == (subset-1)) then
+            found = .True.
+          end if
+        enddo
+        if(s1 /= s2) monoBdo = .false.
+      enddo
+
+      if (.not.found) cycle
+      monoBdo = monoBdo_save
+      maskInd = maskInd_save
+
+      h1 = hole_list(i1,s1)
+      call apply_hole(psi_det_generators(1,1,i_generator), s1, h1, pmask, ok, N_int)
+
+      negMask = not(pmask)
+
+      interesting(0) = 0
+      fullinteresting(0) = 0
+
+      do ii = 1, preinteresting(0)
+        i = preinteresting(ii)
+        select case(N_int)
+          case(1)
+            mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_tc(1,1,i))
+            mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_tc(1,2,i))
+            nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+          case(2)
+            mobMask(1:2,1) = iand(negMask(1:2,1), psi_det_sorted_tc(1:2,1,i))
+            mobMask(1:2,2) = iand(negMask(1:2,2), psi_det_sorted_tc(1:2,2,i))
+            nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) +     &
+                popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2))
+          case(3)
+            mobMask(1:3,1) = iand(negMask(1:3,1), psi_det_sorted_tc(1:3,1,i))
+            mobMask(1:3,2) = iand(negMask(1:3,2), psi_det_sorted_tc(1:3,2,i))
+            nt = 0
+            do j = 3, 1, -1
+              if (mobMask(j,1) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 1))
+                if (nt > 4) exit
+              endif
+              if (mobMask(j,2) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 2))
+                if (nt > 4) exit
+              endif
+            enddo
+          case(4)
+            mobMask(1:4,1) = iand(negMask(1:4,1), psi_det_sorted_tc(1:4,1,i))
+            mobMask(1:4,2) = iand(negMask(1:4,2), psi_det_sorted_tc(1:4,2,i))
+            nt = 0
+            do j = 4, 1, -1
+              if (mobMask(j,1) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 1))
+                if (nt > 4) exit
+              endif
+              if (mobMask(j,2) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 2))
+                if (nt > 4) exit
+              endif
+            enddo
+          case default
+            mobMask(1:N_int,1) = iand(negMask(1:N_int,1), psi_det_sorted_tc(1:N_int,1,i))
+            mobMask(1:N_int,2) = iand(negMask(1:N_int,2), psi_det_sorted_tc(1:N_int,2,i))
+            nt = 0
+            do j = N_int, 1, -1
+              if (mobMask(j,1) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 1))
+                if (nt > 4) exit
+              endif
+              if (mobMask(j,2) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 2))
+                if (nt > 4) exit
+              endif
+            enddo
+        end select
+
+        if(nt <= 4) then
+          sze = interesting(0)
+          if(sze+1 == size(interesting)) then
+            allocate (tmp_array(0:sze))
+            tmp_array(0:sze) = interesting(0:sze)
+            deallocate(interesting)
+            allocate(interesting(0:2*sze))
+            interesting(0:sze) = tmp_array(0:sze)
+            deallocate(tmp_array)
+          endif
+          interesting(0) = sze+1
+          interesting(sze+1) = i
+          if(nt <= 2) then
+            sze = fullinteresting(0)
+            if(sze+1 == size(fullinteresting)) then
+              allocate (tmp_array(0:sze))
+              tmp_array(0:sze) = fullinteresting(0:sze)
+              deallocate(fullinteresting)
+              allocate(fullinteresting(0:2*sze))
+              fullinteresting(0:sze) = tmp_array(0:sze)
+              deallocate(tmp_array)
+            endif
+            fullinteresting(0) = sze+1
+            fullinteresting(sze+1) = i
+          endif
+        endif
+
+      enddo
+
+      do ii = 1, prefullinteresting(0)
+        i = prefullinteresting(ii)
+        nt = 0
+        mobMask(1,1) = iand(negMask(1,1), psi_det_sorted_tc(1,1,i))
+        mobMask(1,2) = iand(negMask(1,2), psi_det_sorted_tc(1,2,i))
+        nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+        if (nt > 2) cycle
+        do j=N_int,2,-1
+          mobMask(j,1) = iand(negMask(j,1), psi_det_sorted_tc(j,1,i))
+          mobMask(j,2) = iand(negMask(j,2), psi_det_sorted_tc(j,2,i))
+          nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
+          if (nt > 2) exit
+        end do
+
+        if(nt <= 2) then
+          sze = fullinteresting(0)
+          if (sze+1 == size(fullinteresting)) then
+            allocate (tmp_array(0:sze))
+            tmp_array(0:sze) = fullinteresting(0:sze)
+            deallocate(fullinteresting)
+            allocate(fullinteresting(0:2*sze))
+            fullinteresting(0:sze) = tmp_array(0:sze)
+            deallocate(tmp_array)
+          endif
+          fullinteresting(0) = sze+1
+          fullinteresting(sze+1) = i
+        endif
+      enddo
+
+      allocate( fullminilist (N_int, 2, fullinteresting(0)), &
+                    minilist (N_int, 2,     interesting(0))  )
+
+      do i = 1, fullinteresting(0)
+        do k = 1, N_int
+          fullminilist(k,1,i) = psi_det_sorted_tc(k,1,fullinteresting(i))
+          fullminilist(k,2,i) = psi_det_sorted_tc(k,2,fullinteresting(i))
+        enddo
+      enddo
+
+      do i = 1, interesting(0)
+        do k = 1, N_int
+          minilist(k,1,i) = psi_det_sorted_tc(k,1,interesting(i))
+          minilist(k,2,i) = psi_det_sorted_tc(k,2,interesting(i))
+        enddo
+      enddo
+
+      do s2 = s1, 2
+        sp = s1
+
+        if(s1 /= s2) sp = 3
+
+        ib = 1
+        if(s1 == s2) ib = i1+1
+        monoAdo = .true.
+        do i2 = N_holes(s2), ib, -1   ! Generate low excitations first
+
+          h2 = hole_list(i2,s2)
+          call apply_hole(pmask, s2,h2, mask, ok, N_int)
+          banned(:,:,1) = banned_excitation(:,:)
+          banned(:,:,2) = banned_excitation(:,:)
+          do j = 1, mo_num
+            bannedOrb(j, 1) = .true.
+            bannedOrb(j, 2) = .true.
+          enddo
+          do s3 = 1, 2
+            do i = 1, N_particles(s3)
+              bannedOrb(particle_list(i,s3), s3) = .false.
+            enddo
+          enddo
+          if(s1 /= s2) then
+            if(monoBdo) then
+              bannedOrb(h1,s1) = .false.
+            endif
+            if(monoAdo) then
+              bannedOrb(h2,s2) = .false.
+              monoAdo = .false.
+            endif
+          endif
+
+          maskInd = maskInd + 1
+          if(mod(maskInd, csubset) == (subset-1)) then
+
+            call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting)
+            if(fullMatch) cycle
+
+            call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting, mat_p, mat_m)
+
+            call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, mat_p, mat_m)
+          endif
+
+        enddo
+
+        if(s1 /= s2) monoBdo = .false.
+      enddo
+
+      deallocate(fullminilist, minilist)
+
+    enddo
+  enddo
+
+  deallocate(preinteresting, prefullinteresting, interesting, fullinteresting)
+  deallocate(banned, bannedOrb,mat)
+  deallocate(mat_p, mat_m)
+
+end subroutine select_singles_and_doubles
+
+! ---
+
+subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting)
+
+  use bitmasks
+  implicit none
+
+  BEGIN_DOC
+  ! Identify the determinants in det which are in the internal space. These are
+  ! the determinants that can be produced by creating two particles on the mask.
+  END_DOC
+
+  integer, intent(in) :: i_gen, N
+  integer, intent(in) :: interesting(0:N)
+  integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N)
+  logical, intent(inout) :: banned(mo_num, mo_num)
+  logical, intent(out) :: fullMatch
+
+  integer :: i, j, na, nb, list(3)
+  integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2)
+
+  fullMatch = .false.
+
+  do i=1,N_int
+    negMask(i,1) = not(mask(i,1))
+    negMask(i,2) = not(mask(i,2))
+  end do
+
+  genl : do i=1, N
+    ! If det(i) can't be generated by the mask, cycle
+    do j=1, N_int
+      if(iand(det(j,1,i), mask(j,1)) /= mask(j, 1)) cycle genl
+      if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl
+    end do
+
+    ! If det(i) < det(i_gen), it hs already been considered
+    if(interesting(i) < i_gen) then
+      fullMatch = .true.
+      return
+    end if
+
+    ! Identify the particles
+    do j=1, N_int
+      myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1))
+      myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2))
+    end do
+
+    call bitstring_to_list_in_selection(myMask(1,1), list(1), na, N_int)
+    call bitstring_to_list_in_selection(myMask(1,2), list(na+1), nb, N_int)
+    banned(list(1), list(2)) = .true.
+  end do genl
+
+end subroutine spot_isinwf
+
+! ---
+
+subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting, mat_p, mat_m)
+
+  BEGIN_DOC
+  ! Computes the contributions A(r,s) by
+  ! comparing the external determinant to all the internal determinants det(i).
+  ! an applying two particles (r,s) to the mask.
+  END_DOC
+
+  use bitmasks
+  implicit none
+
+  integer, intent(in)             :: sp, i_gen, N_sel
+  integer, intent(in)             :: interesting(0:N_sel)
+  integer(bit_kind),intent(in)    :: mask(N_int, 2), det(N_int, 2, N_sel)
+  logical, intent(inout)          :: bannedOrb(mo_num, 2), banned(mo_num, mo_num, 2)
+  double precision, intent(inout) :: mat(N_states, mo_num, mo_num)
+  double precision, intent(inout) :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
+
+  integer                         :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt
+  integer(bit_kind)               :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2)
+  integer(bit_kind)               :: phasemask(N_int,2)
+
+
+  PROVIDE psi_selectors_coef_transp psi_det_sorted_tc
+  PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp
+
+
+  mat   = 0d0
+  mat_p = 0d0
+  mat_m = 0d0
+
+  do i = 1, N_int
+    negMask(i,1) = not(mask(i,1))
+    negMask(i,2) = not(mask(i,2))
+  end do
+
+  do i = 1, N_sel
+    if(interesting(i) < 0) then
+      stop 'prefetch interesting(i) and det(i)'
+    endif
+
+    mobMask(1,1) = iand(negMask(1,1), det(1,1,i))
+    mobMask(1,2) = iand(negMask(1,2), det(1,2,i))
+    nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+
+    if(nt > 4) cycle
+
+    do j = 2, N_int
+      mobMask(j,1) = iand(negMask(j,1), det(j,1,i))
+      mobMask(j,2) = iand(negMask(j,2), det(j,2,i))
+      nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
+    enddo
+
+    if(nt > 4) cycle
+
+    if (interesting(i) == i_gen) then
+      if(sp == 3) then
+        do k = 1, mo_num
+          do j = 1, mo_num
+            banned(j,k,2) = banned(k,j,1)
+          enddo
+        enddo
+      else
+        do k = 1, mo_num
+          do l = k+1, mo_num
+            banned(l,k,1) = banned(k,l,1)
+          enddo
+        enddo
+      endif
+    endif
+
+    if (interesting(i) >= i_gen) then
+
+        call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
+        call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
+
+        call get_d3_h  ( det(1,1,i), bannedOrb, banned, mat         , mask, p, sp, psi_selectors_coef_transp (1, interesting(i)) )
+        call get_d3_htc( det(1,1,i), bannedOrb, banned, mat_m, mat_p, mask, p, sp, psi_selectors_rcoef_bi_orth_transp(1, interesting(i)) &
+                       , psi_selectors_lcoef_bi_orth_transp(1, interesting(i)) )
+
+        !perMask(1,1) = iand(mask(1,1), not(det(1,1,i)))
+        !perMask(1,2) = iand(mask(1,2), not(det(1,2,i)))
+        !do j=2,N_int
+        !  perMask(j,1) = iand(mask(j,1), not(det(j,1,i)))
+        !  perMask(j,2) = iand(mask(j,2), not(det(j,2,i)))
+        !end do
+        !call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int)
+        !call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int)
+        !call get_mask_phase(psi_det_sorted_tc(1,1,interesting(i)), phasemask,N_int)
+        !if(nt == 4) then
+        !  call get_d2 (det(1,1,i), phasemask, bannedOrb, banned, mat,          mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
+        !  call get_pm2(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
+        !elseif(nt == 3) then
+        !  call get_d1 (det(1,1,i), phasemask, bannedOrb, banned, mat         , mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
+        !  call get_pm1(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
+        !else
+        !  call get_d0 (det(1,1,i), phasemask, bannedOrb, banned, mat         , mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
+        !  call get_pm0(det(1,1,i), phasemask, bannedOrb, banned, mat_p, mat_m, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
+        !endif
+    elseif(nt == 4) then
+        call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
+        call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
+        call past_d2(banned, p, sp)
+    elseif(nt == 3) then
+        call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
+        call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
+        call past_d1(bannedOrb, p)
+    endif
+  enddo
+
+end subroutine splash_pq
+
+! ---
+
+subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf, mat_p, mat_m)
+
+  use bitmasks
+  use selection_types
+  implicit none
+
+  integer,          intent(in)          :: i_generator, sp, h1, h2
+  double precision, intent(in)          :: mat(N_states, mo_num, mo_num)
+  double precision, intent(in)          :: mat_p(N_states, mo_num, mo_num), mat_m(N_states, mo_num, mo_num)
+  logical,          intent(in)          :: bannedOrb(mo_num, 2), banned(mo_num, mo_num)
+  double precision, intent(in)          :: fock_diag_tmp(mo_num)
+  double precision, intent(in)          :: E0(N_states)
+  type(pt2_type),         intent(inout) :: pt2_data
+  type(selection_buffer), intent(inout) :: buf
+
+  integer                               :: iii, s, degree
+  integer                               :: s1, s2, p1, p2, ib, j, istate, jstate
+  integer                               :: info, k , iwork(N_states+1)
+  integer(bit_kind)                     :: occ(N_int,2), n
+  integer(bit_kind)                     :: mask(N_int, 2), det(N_int, 2)
+  logical                               :: do_cycle, ok, do_diag
+  double precision                      :: delta_E, val, Hii, w, tmp, alpha_h_psi
+  double precision                      :: E_shift
+  double precision                      :: i_h_alpha, alpha_h_i, psi_h_alpha
+  double precision                      :: e_pert(N_states), coef(N_states)
+  double precision                      :: s_weight(N_states,N_states)
+  double precision                      :: eigvalues(N_states+1)
+  double precision                      :: work(1+6*(N_states+1)+2*(N_states+1)**2)
+
+  integer,          external            :: number_of_holes, number_of_particles
+  logical,          external            :: is_a_two_holes_two_particles
+  logical,          external            :: is_a_1h1p
+  double precision, external            :: diag_H_mat_elem_fock
+
+
+  PROVIDE dominant_dets_of_cfgs N_dominant_dets_of_cfgs
+
+  do jstate = 1, N_states
+    do istate = 1, N_states
+      s_weight(istate,jstate) = dsqrt(selection_weight(istate)*selection_weight(jstate))
+    enddo
+  enddo
+
+  if(sp == 3) then
+    s1 = 1
+    s2 = 2
+  else
+    s1 = sp
+    s2 = sp
+  end if
+  call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int)
+  E_shift = 0.d0
+
+  if (h0_type == 'CFG') then
+    j = det_to_configuration(i_generator)
+    E_shift = psi_det_Hii(i_generator) - psi_configuration_Hii(j)
+  endif
+
+  do p1 = 1, mo_num
+
+    if(bannedOrb(p1, s1)) cycle
+    ib = 1
+    if(sp /= 3) ib = p1+1
+
+    do p2 = ib, mo_num
+
+      if(bannedOrb(p2, s2)) cycle
+      if(banned(p1,p2)) cycle
+
+      ! TODO ??
+      !if(pseudo_sym)then
+      !  if(dabs(mat(1, p1, p2)).lt.thresh_sym)then
+      !    w = 0.d0
+      !  endif
+      !endif
+
+      ! MANU: ERREUR dans les calculs puisque < I | H | J > = 0 
+      ! n'implique pas < I | H_TC | J > = 0 ??
+      !val = maxval(abs(mat(1:N_states, p1, p2)))
+      !if( val == 0d0) cycle
+
+      call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+
+      if(do_only_cas) then
+        if( number_of_particles(det) > 0 ) cycle
+        if( number_of_holes(det) > 0 ) cycle
+      endif
+
+      if(do_ddci) then
+        if(is_a_two_holes_two_particles(det)) cycle
+      endif
+
+      if(do_only_1h1p) then
+        if(.not.is_a_1h1p(det)) cycle
+      endif
+
+      if(seniority_max >= 0) then
+        s = 0
+        do k = 1, N_int
+          s = s + popcnt(ieor(det(k,1),det(k,2)))
+        enddo
+        if (s > seniority_max) cycle
+      endif
+
+      if(excitation_max >= 0) then
+        do_cycle = .True.
+        if(excitation_ref == 1) then
+          call get_excitation_degree(HF_bitmask, det(1,1), degree, N_int)
+          do_cycle = do_cycle .and. (degree > excitation_max)
+        elseif(excitation_ref == 2) then
+          do k = 1, N_dominant_dets_of_cfgs
+            call get_excitation_degree(dominant_dets_of_cfgs(1,1,k), det(1,1), degree, N_int)
+            do_cycle = do_cycle .and. (degree > excitation_max)
+          enddo
+        endif
+        if(do_cycle) cycle
+      endif
+
+      if(excitation_alpha_max >= 0) then
+        do_cycle = .True.
+        if(excitation_ref == 1) then
+          call get_excitation_degree_spin(HF_bitmask, det(1,1), degree, N_int)
+          do_cycle = do_cycle .and. (degree > excitation_max)
+        elseif (excitation_ref == 2) then
+          do k = 1, N_dominant_dets_of_cfgs
+            call get_excitation_degree_spin(dominant_dets_of_cfgs(1,1,k), det(1,1), degree, N_int)
+            do_cycle = do_cycle .and. (degree > excitation_alpha_max)
+          enddo
+        endif
+        if(do_cycle) cycle
+      endif
+
+      if(excitation_beta_max >= 0) then
+        do_cycle = .True.
+        if(excitation_ref == 1) then
+          call get_excitation_degree_spin(HF_bitmask, det(1,2), degree, N_int)
+          do_cycle = do_cycle .and. (degree > excitation_max)
+        elseif(excitation_ref == 2) then
+          do k = 1, N_dominant_dets_of_cfgs
+            call get_excitation_degree(dominant_dets_of_cfgs(1,2,k), det(1,2), degree, N_int)
+            do_cycle = do_cycle .and. (degree > excitation_beta_max)
+          enddo
+        endif
+        if(do_cycle) cycle
+      endif
+
+
+      w = 0.d0
+
+      e_pert = 0.d0
+      coef = 0.d0
+      do_diag = .False.
+
+      ! psi_det_generators  --> |i> of psi_0
+      ! psi_coef_generators --> c_i of psi_0
+      !
+      ! <alpha|H|psi_0> = \sum_i c_i <alpha|H|i>
+
+        ! -------------------------------------------
+        ! Non hermitian 
+        ! c_alpha = <alpha|H(j)|psi_0>/delta_E(alpha)
+        ! e_alpha = c_alpha * <psi_0|H(j)|alpha>
+        ! <alpha|H|psi_0> and <psi_0|H|alpha>
+        ! <det|H(j)|psi_0> and transpose 
+        ! -------------------------------------------
+
+        istate = 1
+        call htilde_mu_mat_bi_ortho_tot(det, det, N_int, Hii)
+        delta_E = E0(istate) - Hii + E_shift
+        !delta_E = 1.d0
+
+!        call get_excitation_degree( HF_bitmask, det, degree, N_int)
+
+!        psi_h_alpha = mat_m(istate, p1, p2)
+!        alpha_h_psi = mat_p(istate, p1, p2)
+!
+        psi_h_alpha = 0.d0
+        alpha_h_psi = 0.d0
+        do iii = 1, N_det
+          call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,iii), det, N_int, i_h_alpha)
+          call htilde_mu_mat_bi_ortho_tot(det, psi_det(1,1,iii), N_int, alpha_h_i)
+          psi_h_alpha += i_h_alpha * leigvec_tc_bi_orth(iii,1)
+          alpha_h_psi += alpha_h_i * reigvec_tc_bi_orth(iii,1) 
+        enddo
+
+        !if(alpha_h_psi*psi_h_alpha/delta_E.gt.1.d-10)then
+        !  print*, 'E0,Hii,E_shift'
+        !  print*, E0(istate), Hii, E_shift
+        !  print*, psi_h_alpha, alpha_h_psi, delta_E
+        !  print*, psi_h_alpha * alpha_h_psi / delta_E
+        !  !if(Hii .lt. E0(istate)) then
+        !  !  call debug_det(det, N_int)
+        !  !  print*, ' |E0| < |Hii| !!!'
+        !  !  print*, ' E0  = ', E0(istate)
+        !  !  print*, ' Hii = ', Hii
+        !  !endif
+        !endif
+
+        coef(istate)   = alpha_h_psi / delta_E 
+        e_pert(istate) = coef(istate) * psi_h_alpha
+        if(selection_tc     ==  1 )then
+         if(e_pert(istate).lt.0.d0)then
+          e_pert(istate) = 0.d0
+         endif
+        else if(selection_tc == -1)then
+         if(e_pert(istate).gt.0.d0)then
+          e_pert(istate) = 0.d0
+         endif
+        endif
+         
+
+        !if(e_pert(istate) .gt. 1.d-15) then
+        !  print*, 'E0,Hii,E_shift'
+        !  print*, E0(istate), Hii, E_shift
+        !  print*, psi_h_alpha, alpha_h_psi, delta_E
+        !  print*, psi_h_alpha*alpha_h_psi/delta_E
+        !endif
+
+!      elseif(cipsi_tc == "h_tc_2x2") then
+
+
+      do_diag = sum(dabs(coef)) > 0.001d0 .and. N_states > 1
+
+      do istate = 1, N_states
+
+        alpha_h_psi = mat(istate, p1, p2)
+
+        pt2_data % overlap(:,istate) = pt2_data % overlap(:,istate) + coef(:) * coef(istate)
+        pt2_data % variance(istate)  = pt2_data % variance(istate) + dabs(e_pert(istate))
+        pt2_data % pt2(istate)       = pt2_data % pt2(istate)      + e_pert(istate)
+
+        select case (weight_selection)
+          case(5)
+            ! Variance selection
+            if (h0_type == 'CFG') then
+              w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate)) & 
+                / c0_weight(istate)
+            else
+              w = min(w, - alpha_h_psi * alpha_h_psi * s_weight(istate,istate))
+            endif
+          case(6)
+            if (h0_type == 'CFG') then
+              w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate)) &
+                / c0_weight(istate)
+            else
+              w = min(w,- coef(istate) * coef(istate) * s_weight(istate,istate))
+            endif
+          case default
+            ! Energy selection
+            if (h0_type == 'CFG') then
+              !w = min(w, e_pert(istate) * s_weight(istate,istate)) / c0_weight(istate)
+              w = min(w, -dabs(e_pert(istate)) * s_weight(istate,istate)) / c0_weight(istate)
+            else
+              !w = min(w, e_pert(istate) * s_weight(istate,istate))
+              w = min(w, -dabs( e_pert(istate) ) * s_weight(istate,istate))
+            endif
+        endselect
+      enddo
+
+      if(h0_type == 'CFG') then
+        do k = 1, N_int
+          occ(k,1) = ieor(det(k,1), det(k,2))
+          occ(k,2) = iand(det(k,1), det(k,2))
+        enddo
+        call configuration_to_dets_size(occ, n, elec_alpha_num, N_int)
+        n = max(n,1)
+        w *= dsqrt(dble(n))
+      endif
+
+      if(w <= buf%mini) then
+        call add_to_selection_buffer(buf, det, w)
+      endif
+
+    enddo ! end do p2
+  enddo   ! end do p1
+
+end subroutine fill_buffer_double
+
+! ---
+
+subroutine get_mask_phase(det1, pm, Nint)
+
+  use bitmasks
+  implicit none
+
+  integer, intent(in) :: Nint
+  integer(bit_kind), intent(in) :: det1(Nint,2)
+  integer(bit_kind), intent(out) :: pm(Nint,2)
+  integer(bit_kind) :: tmp1, tmp2
+  integer :: i
+  tmp1 = 0_8
+  tmp2 = 0_8
+  select case (Nint)
+
+BEGIN_TEMPLATE
+    case ($Nint)
+      do i=1,$Nint
+        pm(i,1) = ieor(det1(i,1), shiftl(det1(i,1), 1))
+        pm(i,2) = ieor(det1(i,2), shiftl(det1(i,2), 1))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 2))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 2))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 4))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 4))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 8))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 8))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 16))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 16))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 32))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 32))
+        pm(i,1) = ieor(pm(i,1), tmp1)
+        pm(i,2) = ieor(pm(i,2), tmp2)
+        if(iand(popcnt(det1(i,1)), 1) == 1) tmp1 = not(tmp1)
+        if(iand(popcnt(det1(i,2)), 1) == 1) tmp2 = not(tmp2)
+      end do
+SUBST [ Nint ]
+1;;
+2;;
+3;;
+4;;
+END_TEMPLATE
+    case default
+      do i=1,Nint
+        pm(i,1) = ieor(det1(i,1), shiftl(det1(i,1), 1))
+        pm(i,2) = ieor(det1(i,2), shiftl(det1(i,2), 1))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 2))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 2))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 4))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 4))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 8))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 8))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 16))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 16))
+        pm(i,1) = ieor(pm(i,1), shiftl(pm(i,1), 32))
+        pm(i,2) = ieor(pm(i,2), shiftl(pm(i,2), 32))
+        pm(i,1) = ieor(pm(i,1), tmp1)
+        pm(i,2) = ieor(pm(i,2), tmp2)
+        if(iand(popcnt(det1(i,1)), 1) == 1) tmp1 = not(tmp1)
+        if(iand(popcnt(det1(i,2)), 1) == 1) tmp2 = not(tmp2)
+      end do
+  end select
+
+end subroutine get_mask_phase
+
+! ---
+
+subroutine past_d1(bannedOrb, p)
+
+  use bitmasks
+  implicit none
+
+  logical, intent(inout) :: bannedOrb(mo_num, 2)
+  integer, intent(in) :: p(0:4, 2)
+  integer :: i,s
+
+  do s = 1, 2
+    do i = 1, p(0, s)
+      bannedOrb(p(i, s), s) = .true.
+    end do
+  end do
+
+end subroutine past_d1
+
+! ---
+
+subroutine past_d2(banned, p, sp)
+
+  use bitmasks
+  implicit none
+
+  logical, intent(inout) :: banned(mo_num, mo_num)
+  integer, intent(in) :: p(0:4, 2), sp
+  integer :: i,j
+
+  if(sp == 3) then
+    do j=1,p(0,2)
+      do i=1,p(0,1)
+        banned(p(i,1), p(j,2)) = .true.
+      end do
+    end do
+  else
+    do i=1,p(0, sp)
+      do j=1,i-1
+        banned(p(j,sp), p(i,sp)) = .true.
+        banned(p(i,sp), p(j,sp)) = .true.
+      end do
+    end do
+  end if
+
+end subroutine past_d2
+
+! ---
+
+subroutine bitstring_to_list_in_selection( string, list, n_elements, Nint)
+
+  BEGIN_DOC
+  ! Gives the inidices(+1) of the bits set to 1 in the bit string
+  END_DOC
+
+  use bitmasks
+  implicit none
+
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: string(Nint)
+  integer, intent(out)           :: list(Nint*bit_kind_size)
+  integer, intent(out)           :: n_elements
+
+  integer                        :: i, ishift
+  integer(bit_kind)              :: l
+
+  n_elements = 0
+  ishift = 2
+  do i=1,Nint
+    l = string(i)
+    do while (l /= 0_bit_kind)
+      n_elements = n_elements+1
+      list(n_elements) = ishift+popcnt(l-1_bit_kind) - popcnt(l)
+      l = iand(l,l-1_bit_kind)
+    enddo
+    ishift = ishift + bit_kind_size
+  enddo
+
+end subroutine bitstring_to_list_in_selection
+
+! ---
+
diff --git a/src/cipsi_tc_bi_ortho/selection_buffer.irp.f b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f
new file mode 100644
index 00000000..10132086
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f
@@ -0,0 +1,416 @@
+
+subroutine create_selection_buffer(N, size_in, res)
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Allocates the memory for a selection buffer.
+! The arrays have dimension size_in and the maximum number of elements is N
+  END_DOC
+
+  integer, intent(in) :: N, size_in
+  type(selection_buffer), intent(out) :: res
+
+  integer :: siz
+  siz = max(size_in,1)
+
+  double precision :: rss
+  double precision, external :: memory_of_double
+  rss = memory_of_double(siz)*(N_int*2+1)
+  call check_mem(rss,irp_here)
+
+  allocate(res%det(N_int, 2, siz), res%val(siz))
+
+  res%val(:) = 0d0
+  res%det(:,:,:) = 0_8
+  res%N = N
+  res%mini = 0d0
+  res%cur = 0
+end subroutine
+
+subroutine delete_selection_buffer(b)
+  use selection_types
+  implicit none
+  type(selection_buffer), intent(inout) :: b
+  if (associated(b%det)) then
+    deallocate(b%det)
+  endif
+  if (associated(b%val)) then
+    deallocate(b%val)
+  endif
+  NULLIFY(b%det)
+  NULLIFY(b%val)
+  b%cur = 0
+  b%mini = 0.d0
+  b%N = 0
+end
+
+
+subroutine add_to_selection_buffer(b, det, val)
+  use selection_types
+  implicit none
+
+  type(selection_buffer), intent(inout) :: b
+  integer(bit_kind), intent(in) :: det(N_int, 2)
+  double precision, intent(in) :: val
+  integer :: i
+
+  if(b%N > 0 .and. val <= b%mini) then
+    b%cur += 1
+    b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2)
+    b%val(b%cur) = val
+    if(b%cur == size(b%val)) then
+      call sort_selection_buffer(b)
+    end if
+  end if
+end subroutine
+
+subroutine merge_selection_buffers(b1, b2)
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Merges the selection buffers b1 and b2 into b2
+  END_DOC
+  type(selection_buffer), intent(inout) :: b1
+  type(selection_buffer), intent(inout) :: b2
+  integer(bit_kind), pointer     :: detmp(:,:,:)
+  double precision, pointer      :: val(:)
+  integer                        :: i, i1, i2, k, nmwen, sze
+  if (b1%cur == 0) return
+  do while (b1%val(b1%cur) > b2%mini)
+    b1%cur = b1%cur-1
+    if (b1%cur == 0) then
+      return
+    endif
+  enddo
+  nmwen = min(b1%N, b1%cur+b2%cur)
+  double precision :: rss
+  double precision, external :: memory_of_double
+  sze = max(size(b1%val), size(b2%val))
+  rss = memory_of_double(sze) + 2*N_int*memory_of_double(sze)
+  call check_mem(rss,irp_here)
+  allocate(val(sze), detmp(N_int, 2, sze))
+  i1=1
+  i2=1
+  do i=1,nmwen
+    if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then
+      exit
+    else if (i1 > b1%cur) then
+        val(i) = b2%val(i2)
+        detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2)
+        detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2)
+        i2=i2+1
+    else if (i2 > b2%cur) then
+        val(i) = b1%val(i1)
+        detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1)
+        detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1)
+        i1=i1+1
+    else
+      if (b1%val(i1) <= b2%val(i2)) then
+        val(i) = b1%val(i1)
+        detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1)
+        detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1)
+        i1=i1+1
+      else
+        val(i) = b2%val(i2)
+        detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2)
+        detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2)
+        i2=i2+1
+      endif
+    endif
+  enddo
+  deallocate(b2%det, b2%val)
+  do i=nmwen+1,b2%N
+    val(i) = 0.d0
+    detmp(1:N_int,1:2,i) = 0_bit_kind
+  enddo
+  b2%det => detmp
+  b2%val => val
+  b2%mini = min(b2%mini,b2%val(b2%N))
+  b2%cur = nmwen
+end
+
+
+subroutine sort_selection_buffer(b)
+  use selection_types
+  implicit none
+
+  type(selection_buffer), intent(inout) :: b
+  integer, allocatable :: iorder(:)
+  integer(bit_kind), pointer :: detmp(:,:,:)
+  integer :: i, nmwen
+  logical, external :: detEq
+  if (b%N == 0 .or. b%cur == 0) return
+  nmwen = min(b%N, b%cur)
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_int(b%cur) + 2*N_int*memory_of_double(size(b%det,3))
+  call check_mem(rss,irp_here)
+  allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3)))
+  do i=1,b%cur
+    iorder(i) = i
+  end do
+  call dsort(b%val, iorder, b%cur)
+  do i=1, nmwen
+    detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i))
+    detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i))
+  end do
+  deallocate(b%det,iorder)
+  b%det => detmp
+  b%mini = min(b%mini,b%val(b%N))
+  b%cur = nmwen
+end subroutine
+
+subroutine make_selection_buffer_s2(b)
+  use selection_types
+  type(selection_buffer), intent(inout) :: b
+
+  integer(bit_kind), allocatable :: o(:,:,:)
+  double precision, allocatable  :: val(:)
+
+  integer :: n_d
+  integer :: i,k,sze,n_alpha,j,n
+  logical                        :: dup
+
+  ! Sort
+  integer, allocatable           :: iorder(:)
+  integer*8, allocatable         :: bit_tmp(:)
+  integer*8, external            :: configuration_search_key
+  integer(bit_kind), allocatable :: tmp_array(:,:,:)
+  logical, allocatable           :: duplicate(:)
+
+  n_d = b%cur
+  double precision :: rss
+  double precision, external :: memory_of_double
+  rss = (4*N_int+4)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+  allocate(o(N_int,2,n_d), iorder(n_d), duplicate(n_d), bit_tmp(n_d), &
+           tmp_array(N_int,2,n_d), val(n_d) )
+
+  do i=1,n_d
+    do k=1,N_int
+      o(k,1,i) = ieor(b%det(k,1,i), b%det(k,2,i))
+      o(k,2,i) = iand(b%det(k,1,i), b%det(k,2,i))
+    enddo
+    iorder(i) = i
+    bit_tmp(i) = configuration_search_key(o(1,1,i),N_int)
+  enddo
+
+  deallocate(b%det)
+
+  call i8sort(bit_tmp,iorder,n_d)
+
+  do i=1,n_d
+    do k=1,N_int
+      tmp_array(k,1,i) = o(k,1,iorder(i))
+      tmp_array(k,2,i) = o(k,2,iorder(i))
+    enddo
+    val(i) = b%val(iorder(i))
+    duplicate(i) = .False.
+  enddo
+
+  ! Find duplicates
+  do i=1,n_d-1
+    if (duplicate(i)) then
+      cycle
+    endif
+    j = i+1
+    do while (bit_tmp(j)==bit_tmp(i))
+      if (duplicate(j)) then
+        j+=1
+        if (j>n_d) then
+          exit
+        endif
+        cycle
+      endif
+      dup = .True.
+      do k=1,N_int
+        if ( (tmp_array(k,1,i) /= tmp_array(k,1,j))                   &
+              .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then
+          dup = .False.
+          exit
+        endif
+      enddo
+      if (dup) then
+        val(i) = max(val(i), val(j))
+        duplicate(j) = .True.
+      endif
+      j+=1
+      if (j>n_d) then
+        exit
+      endif
+    enddo
+  enddo
+
+  deallocate (b%val)
+  ! Copy filtered result
+  integer :: n_p
+  n_p=0
+  do i=1,n_d
+    if (duplicate(i)) then
+      cycle
+    endif
+    n_p = n_p + 1
+    do k=1,N_int
+      o(k,1,n_p) = tmp_array(k,1,i)
+      o(k,2,n_p) = tmp_array(k,2,i)
+    enddo
+    val(n_p) = val(i)
+  enddo
+
+  ! Sort by importance
+  do i=1,n_p
+    iorder(i) = i
+  end do
+  call dsort(val,iorder,n_p)
+  do i=1,n_p
+    do k=1,N_int
+      tmp_array(k,1,i) = o(k,1,iorder(i))
+      tmp_array(k,2,i) = o(k,2,iorder(i))
+    enddo
+  enddo
+  do i=1,n_p
+    do k=1,N_int
+      o(k,1,i) = tmp_array(k,1,i)
+      o(k,2,i) = tmp_array(k,2,i)
+    enddo
+  enddo
+
+  ! Create determinants
+  n_d = 0
+  do i=1,n_p
+    call configuration_to_dets_size(o(1,1,i),sze,elec_alpha_num,N_int)
+    n_d = n_d + sze
+    if (n_d > b%cur) then
+!      if (n_d - b%cur > b%cur - n_d + sze) then
+!        n_d = n_d - sze
+!      endif
+      exit
+    endif
+  enddo
+
+  rss = (4*N_int+2)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+  allocate(b%det(N_int,2,2*n_d), b%val(2*n_d))
+  k=1
+  do i=1,n_p
+    n=n_d
+    call configuration_to_dets_size(o(1,1,i),n,elec_alpha_num,N_int)
+    call configuration_to_dets(o(1,1,i),b%det(1,1,k),n,elec_alpha_num,N_int)
+    do j=k,k+n-1
+      b%val(j) = val(i)
+    enddo
+    k = k+n
+    if (k > n_d) exit
+  enddo
+  deallocate(o)
+  b%cur = n_d
+  b%N = n_d
+end
+
+
+
+
+subroutine remove_duplicates_in_selection_buffer(b)
+  use selection_types
+  type(selection_buffer), intent(inout) :: b
+
+  integer(bit_kind), allocatable :: o(:,:,:)
+  double precision, allocatable  :: val(:)
+
+  integer :: n_d
+  integer :: i,k,sze,n_alpha,j,n
+  logical                        :: dup
+
+  ! Sort
+  integer, allocatable           :: iorder(:)
+  integer*8, allocatable         :: bit_tmp(:)
+  integer*8, external            :: det_search_key
+  integer(bit_kind), allocatable :: tmp_array(:,:,:)
+  logical, allocatable           :: duplicate(:)
+
+  n_d = b%cur
+  logical                        :: found_duplicates
+  double precision               :: rss
+  double precision, external     :: memory_of_double
+  rss = (4*N_int+4)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+
+  found_duplicates = .False.
+  allocate(iorder(n_d), duplicate(n_d), bit_tmp(n_d), &
+           tmp_array(N_int,2,n_d), val(n_d) )
+
+  do i=1,n_d
+    iorder(i) = i
+    bit_tmp(i) = det_search_key(b%det(1,1,i),N_int)
+  enddo
+
+  call i8sort(bit_tmp,iorder,n_d)
+
+  do i=1,n_d
+    do k=1,N_int
+      tmp_array(k,1,i) = b%det(k,1,iorder(i))
+      tmp_array(k,2,i) = b%det(k,2,iorder(i))
+    enddo
+    val(i) = b%val(iorder(i))
+    duplicate(i) = .False.
+  enddo
+
+  ! Find duplicates
+  do i=1,n_d-1
+    if (duplicate(i)) then
+      cycle
+    endif
+    j = i+1
+    do while (bit_tmp(j)==bit_tmp(i))
+      if (duplicate(j)) then
+        j+=1
+        if (j>n_d) then
+          exit
+        endif
+        cycle
+      endif
+      dup = .True.
+      do k=1,N_int
+        if ( (tmp_array(k,1,i) /= tmp_array(k,1,j))                   &
+              .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then
+          dup = .False.
+          exit
+        endif
+      enddo
+      if (dup) then
+        duplicate(j) = .True.
+        found_duplicates = .True.
+      endif
+      j+=1
+      if (j>n_d) then
+        exit
+      endif
+    enddo
+  enddo
+
+  if (found_duplicates) then
+
+    ! Copy filtered result
+    integer :: n_p
+    n_p=0
+    do i=1,n_d
+      if (duplicate(i)) then
+        cycle
+      endif
+      n_p = n_p + 1
+      do k=1,N_int
+        b%det(k,1,n_p) = tmp_array(k,1,i)
+        b%det(k,2,n_p) = tmp_array(k,2,i)
+      enddo
+      val(n_p) = val(i)
+    enddo
+    b%cur=n_p
+    b%N=n_p
+
+  endif
+
+end
+
+
+
diff --git a/src/cipsi_tc_bi_ortho/selection_weight.irp.f b/src/cipsi_tc_bi_ortho/selection_weight.irp.f
new file mode 100644
index 00000000..3c09e59a
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/selection_weight.irp.f
@@ -0,0 +1,134 @@
+BEGIN_PROVIDER [ double precision, pt2_match_weight, (N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Weights adjusted along the selection to make the PT2 contributions
+ ! of each state coincide.
+ END_DOC
+ pt2_match_weight(:) = 1.d0
+END_PROVIDER
+
+
+
+BEGIN_PROVIDER [ double precision, variance_match_weight, (N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Weights adjusted along the selection to make the variances
+ ! of each state coincide.
+ END_DOC
+ variance_match_weight(:) = 1.d0
+END_PROVIDER
+
+
+
+subroutine update_pt2_and_variance_weights(pt2_data, N_st)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! Updates the PT2- and Variance- matching weights.
+  END_DOC
+  integer, intent(in)          :: N_st
+  type(pt2_type), intent(in)   :: pt2_data
+  double precision             :: pt2(N_st)
+  double precision             :: variance(N_st)
+
+  double precision :: avg, element, dt, x
+  integer          :: k
+  pt2(:)      = pt2_data % pt2(:)
+  variance(:) = pt2_data % variance(:)
+
+  avg = sum(pt2(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero
+
+  dt = 8.d0 !* selection_factor
+  do k=1,N_st
+    element = exp(dt*(pt2(k)/avg - 1.d0))
+    element = min(2.0d0 , element)
+    element = max(0.5d0 , element)
+    pt2_match_weight(k) *= element
+  enddo
+
+
+  avg = sum(variance(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero
+
+  do k=1,N_st
+    element = exp(dt*(variance(k)/avg -1.d0))
+    element = min(2.0d0 , element)
+    element = max(0.5d0 , element)
+    variance_match_weight(k) *= element
+  enddo
+
+  if (N_det < 100) then
+    ! For tiny wave functions, weights are 1.d0
+    pt2_match_weight(:) = 1.d0
+    variance_match_weight(:) = 1.d0
+  endif
+
+  threshold_davidson_pt2 = min(1.d-6, &
+     max(threshold_davidson, 1.e-1 * PT2_relative_error * minval(abs(pt2(1:N_states)))) )
+
+  SOFT_TOUCH pt2_match_weight variance_match_weight threshold_davidson_pt2
+end
+
+
+
+
+BEGIN_PROVIDER [ double precision, selection_weight, (N_states) ]
+   implicit none
+   BEGIN_DOC
+   ! Weights used in the selection criterion
+   END_DOC
+   select case (weight_selection)
+
+     case (0)
+      print *,  'Using input weights in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * state_average_weight(1:N_states)
+
+     case (1)
+      print *,  'Using 1/c_max^2 weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states)
+
+     case (2)
+      print *,  'Using pt2-matching weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * pt2_match_weight(1:N_states)
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+
+     case (3)
+      print *,  'Using variance-matching weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (4)
+      print *,  'Using variance- and pt2-matching weights in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * sqrt(variance_match_weight(1:N_states) * pt2_match_weight(1:N_states))
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (5)
+      print *,  'Using variance-matching weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (6)
+      print *,  'Using CI coefficient-based selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states)
+
+     case (7)
+      print *,  'Input weights multiplied by variance- and pt2-matching'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * sqrt(variance_match_weight(1:N_states) * pt2_match_weight(1:N_states)) * state_average_weight(1:N_states)
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (8)
+      print *,  'Input weights multiplied by pt2-matching'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * pt2_match_weight(1:N_states) * state_average_weight(1:N_states)
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+
+     case (9)
+      print *,  'Input weights multiplied by variance-matching'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states) * state_average_weight(1:N_states)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+    end select
+     print *, '# Total weight ', real(selection_weight(:),4)
+
+END_PROVIDER
+
diff --git a/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f
new file mode 100644
index 00000000..c3a49280
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f
@@ -0,0 +1,350 @@
+subroutine run_slave_cipsi
+
+  BEGIN_DOC
+  ! Helper program for distributed parallelism
+  END_DOC
+
+  implicit none
+
+  call omp_set_max_active_levels(1)
+  distributed_davidson = .False.
+  read_wf = .False.
+  SOFT_TOUCH read_wf distributed_davidson
+  call provide_everything
+  call switch_qp_run_to_master
+  call run_slave_main
+end
+
+subroutine provide_everything
+  PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag
+  PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp
+
+  PROVIDE pt2_e0_denominator mo_num N_int ci_energy mpi_master zmq_state zmq_context
+  PROVIDE psi_det psi_coef threshold_generators state_average_weight
+  PROVIDE N_det_selectors pt2_stoch_istate N_det selection_weight pseudo_sym
+end
+
+
+subroutine run_slave_main
+
+  use f77_zmq
+
+  implicit none
+  IRP_IF MPI
+    include 'mpif.h'
+  IRP_ENDIF
+
+  integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR) :: zmq_to_qp_run_socket
+  double precision :: energy(N_states)
+  character*(64) :: states(10)
+  character*(64) :: old_state
+  integer :: rc, i, ierr
+  double precision :: t0, t1
+
+  integer, external              :: zmq_get_dvector, zmq_get_N_det_generators
+  integer, external              :: zmq_get8_dvector
+  integer, external              :: zmq_get_ivector
+  integer, external              :: zmq_get_psi, zmq_get_N_det_selectors, zmq_get_psi_bilinear
+  integer, external              :: zmq_get_psi_notouch
+  integer, external              :: zmq_get_N_states_diag
+
+  zmq_context = f77_zmq_ctx_new ()
+  states(1) = 'selection'
+  states(2) = 'davidson'
+  states(3) = 'pt2'
+  old_state = 'Waiting'
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  PROVIDE psi_det psi_coef threshold_generators state_average_weight mpi_master
+  PROVIDE zmq_state N_det_selectors pt2_stoch_istate N_det pt2_e0_denominator
+  PROVIDE N_det_generators N_states N_states_diag pt2_e0_denominator mpi_rank
+
+  IRP_IF MPI
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  do
+
+    if (mpi_master) then
+      call wait_for_states(states,zmq_state,size(states))
+      if (zmq_state(1:64) == old_state(1:64)) then
+        call usleep(200)
+        cycle
+      else
+        old_state(1:64) = zmq_state(1:64)
+      endif
+      print *,  trim(zmq_state)
+    endif
+
+    IRP_IF MPI_DEBUG
+      print *,  irp_here, mpi_rank
+      call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+    IRP_ENDIF
+    IRP_IF MPI
+      call MPI_BCAST (zmq_state, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        print *,  irp_here, 'error in broadcast of zmq_state'
+      endif
+    IRP_ENDIF
+
+    if(zmq_state(1:7) == 'Stopped') then
+      exit
+    endif
+
+
+    if (zmq_state(1:9) == 'selection') then
+
+      ! Selection
+      ! ---------
+
+      call wall_time(t0)
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_psi')
+      IRP_ENDIF
+      if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector threshold_generators')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector energy')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_generators')
+      IRP_ENDIF
+      if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_selectors')
+      IRP_ENDIF
+      if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector state_average_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector selection_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) cycle
+      pt2_e0_denominator(1:N_states) = energy(1:N_states)
+      TOUCH pt2_e0_denominator state_average_weight threshold_generators selection_weight psi_det psi_coef
+
+      if (mpi_master) then
+        print *,  'N_det', N_det
+        print *,  'N_det_generators', N_det_generators
+        print *,  'N_det_selectors', N_det_selectors
+        print *,  'pt2_e0_denominator', pt2_e0_denominator
+        print *,  'pt2_stoch_istate', pt2_stoch_istate
+        print *,  'state_average_weight', state_average_weight
+        print *,  'selection_weight', selection_weight
+      endif
+      call wall_time(t1)
+      call write_double(6,(t1-t0),'Broadcast time')
+
+      IRP_IF MPI_DEBUG
+        call mpi_print('Entering OpenMP section')
+      IRP_ENDIF
+      !$OMP PARALLEL PRIVATE(i)
+      i = omp_get_thread_num()
+      call run_selection_slave(0,i,energy)
+      !$OMP END PARALLEL
+      print *,  mpi_rank, ': Selection done'
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call mpi_print('----------')
+
+    else if (zmq_state(1:8) == 'davidson') then
+
+      ! Davidson
+      ! --------
+
+      call wall_time(t0)
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_states_diag')
+      IRP_ENDIF
+      if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_psi')
+      IRP_ENDIF
+      if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
+
+      call wall_time(t1)
+      call write_double(6,(t1-t0),'Broadcast time')
+
+      !---
+      call omp_set_max_active_levels(8)
+      call davidson_slave_tcp(0)
+      call omp_set_max_active_levels(1)
+      print *,  mpi_rank, ': Davidson done'
+      !---
+
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call mpi_print('----------')
+
+    else if (zmq_state(1:3) == 'pt2') then
+
+      ! PT2
+      ! ---
+
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call wall_time(t0)
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_psi')
+      IRP_ENDIF
+      if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_generators')
+      IRP_ENDIF
+      if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_selectors')
+      IRP_ENDIF
+      if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector threshold_generators')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector energy')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_ivector pt2_stoch_istate')
+      IRP_ENDIF
+      if (zmq_get_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector state_average_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector selection_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) cycle
+      pt2_e0_denominator(1:N_states) = energy(1:N_states)
+      SOFT_TOUCH pt2_e0_denominator state_average_weight pt2_stoch_istate threshold_generators selection_weight psi_det psi_coef N_det_generators N_det_selectors
+
+
+      call wall_time(t1)
+      call write_double(6,(t1-t0),'Broadcast time')
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+
+
+      IRP_IF MPI_DEBUG
+        call mpi_print('Entering OpenMP section')
+      IRP_ENDIF
+      if (.true.) then
+        integer :: nproc_target, ii
+        double precision :: mem_collector, mem, rss
+
+        call resident_memory(rss)
+
+        nproc_target = nthreads_pt2
+        ii = min(N_det, (elec_alpha_num*(mo_num-elec_alpha_num))**2)
+
+        do
+          mem = rss +                             & !
+                nproc_target * 8.d0 *             & ! bytes
+                ( 0.5d0*pt2_n_tasks_max           & ! task_id
+                + 64.d0*pt2_n_tasks_max           & ! task
+                + 3.d0*pt2_n_tasks_max*N_states   & ! pt2, variance, norm
+                + 1.d0*pt2_n_tasks_max            & ! i_generator, subset
+                + 3.d0*(N_int*2.d0*ii+ ii)        & ! selection buffer
+                + 1.d0*(N_int*2.d0*ii+ ii)        & ! sort selection buffer
+                + 2.0d0*(ii)                      & ! preinteresting, interesting,
+                                                    ! prefullinteresting, fullinteresting
+                + 2.0d0*(N_int*2*ii)              & ! minilist, fullminilist
+                + 1.0d0*(N_states*mo_num*mo_num)  & ! mat
+                ) / 1024.d0**3
+
+          if (nproc_target == 0) then
+            call check_mem(mem,irp_here)
+            nproc_target = 1
+            exit
+          endif
+
+          if (mem+rss < qp_max_mem) then
+            exit
+          endif
+
+          nproc_target = nproc_target - 1
+
+        enddo
+        
+        if (N_det > 100000) then
+
+          if (mpi_master) then
+            print *,  'N_det', N_det
+            print *,  'N_det_generators', N_det_generators
+            print *,  'N_det_selectors', N_det_selectors
+            print *,  'pt2_e0_denominator', pt2_e0_denominator
+            print *,  'pt2_stoch_istate', pt2_stoch_istate
+            print *,  'state_average_weight', state_average_weight
+            print *,  'selection_weight', selection_weight
+            print *,  'Number of threads', nproc_target
+          endif
+
+          if (h0_type == 'CFG') then
+            PROVIDE det_to_configuration
+          endif
+
+          PROVIDE global_selection_buffer pt2_N_teeth pt2_F N_det_generators
+          PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+          PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+          PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+          PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted_tc
+          PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp
+
+          PROVIDE psi_det_hii selection_weight pseudo_sym pt2_min_parallel_tasks
+
+          if (mpi_master) then
+            print *,  'Running PT2'
+          endif
+          !$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1)
+          i = omp_get_thread_num()
+          call run_pt2_slave(0,i,pt2_e0_denominator)
+          !$OMP END PARALLEL
+          FREE state_average_weight
+          print *,  mpi_rank, ': PT2 done'
+          print *,  '-------'
+
+        endif
+      endif
+
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call mpi_print('----------')
+
+    endif
+
+  end do
+  IRP_IF MPI
+    call MPI_finalize(ierr)
+  IRP_ENDIF
+end
+
+
+
diff --git a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
new file mode 100644
index 00000000..33fe23fc
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
@@ -0,0 +1,147 @@
+subroutine run_stochastic_cipsi
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Selected Full Configuration Interaction with Stochastic selection and PT2.
+  END_DOC
+  integer                        :: i,j,k,ndet
+  double precision, allocatable  :: zeros(:)
+  integer                        :: to_select
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  logical, external              :: qp_stop
+  logical                        :: print_pt2
+
+  double precision :: rss
+  double precision, external :: memory_of_double
+  double precision :: correlation_energy_ratio,E_denom,E_tc,norm
+  double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:)
+  PROVIDE H_apply_buffer_allocated distributed_davidson 
+
+  print*,'Diagonal elements of the Fock matrix '
+  do i = 1, mo_num
+   write(*,*)i,Fock_matrix_tc_mo_tot(i,i)
+  enddo
+  N_iter = 1
+  threshold_generators = 1.d0
+  SOFT_TOUCH threshold_generators
+
+  rss = memory_of_double(N_states)*4.d0
+  call check_mem(rss,irp_here)
+
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+
+  double precision               :: hf_energy_ref
+  logical                        :: has
+  double precision               :: relative_error
+
+  relative_error=PT2_relative_error
+
+  zeros = 0.d0
+  pt2_data % pt2   = -huge(1.e0)
+  pt2_data % rpt2  = -huge(1.e0)
+  pt2_data % overlap= 0.d0
+  pt2_data % variance = huge(1.e0)
+
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  print_pt2 = .False.
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  call routine_save_right
+
+
+  if (N_det > N_det_max) then
+    psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det)
+    psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states)
+    N_det = N_det_max
+    soft_touch N_det psi_det psi_coef
+    if (s2_eig) then
+      call make_s2_eigenfunction
+    endif
+    print_pt2 = .False.
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!    call routine_save_right
+  endif
+
+  allocate(ept2(1000),pt1(1000),extrap_energy(100))
+
+  correlation_energy_ratio = 0.d0
+
+! thresh_it_dav  = 5.d-5
+! soft_touch thresh_it_dav
+
+  print_pt2 = .True.
+  do while (                                                         &
+        (N_det < N_det_max) .and.                                    &
+        (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max)          &
+        )
+      write(*,'(A)')  '--------------------------------------------------------------------------------'
+
+
+    to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
+    to_select = max(N_states_diag, to_select)
+
+    E_denom = E_tc ! TC Energy of the current wave function 
+    call pt2_dealloc(pt2_data)
+    call pt2_dealloc(pt2_data_err)
+    call pt2_alloc(pt2_data, N_states)
+    call pt2_alloc(pt2_data_err, N_states)
+    call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
+
+    N_iter += 1
+
+    if (qp_stop()) exit
+
+    ! Add selected determinants
+    call copy_H_apply_buffer_to_wf_tc()
+
+    PROVIDE  psi_l_coef_bi_ortho psi_r_coef_bi_ortho
+    PROVIDE  psi_det
+    PROVIDE  psi_det_sorted_tc
+
+    ept2(N_iter-1) = E_tc + nuclear_repulsion + (pt2_data % pt2(1))/norm
+    pt1(N_iter-1) = dsqrt(pt2_data % overlap(1,1))
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+    if (qp_stop()) exit
+  enddo
+!  print*,'data to extrapolate '
+!  do i = 2, N_iter
+!   print*,'iteration ',i
+!   print*,'pt1,Ept2',pt1(i),ept2(i)
+!   call get_extrapolated_energy(i-1,ept2(i),pt1(i),extrap_energy(i))
+!   do j = 2, i
+!    print*,'j,e,energy',j,extrap_energy(j)
+!   enddo
+!  enddo
+
+! thresh_it_dav  = 5.d-6
+! soft_touch thresh_it_dav
+
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+  call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  if (.not.qp_stop()) then
+!    if (N_det < N_det_max) then
+!     thresh_it_dav  = 5.d-7
+!     soft_touch thresh_it_dav
+!     call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!    endif
+!
+!    call pt2_dealloc(pt2_data)
+!    call pt2_dealloc(pt2_data_err)
+!    call pt2_alloc(pt2_data, N_states)
+!    call pt2_alloc(pt2_data_err, N_states)
+!    call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2
+!    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  endif
+!  call pt2_dealloc(pt2_data)
+!  call pt2_dealloc(pt2_data_err)
+!  call routine_save_right
+
+end
+
diff --git a/src/cipsi_tc_bi_ortho/zmq_selection.irp.f b/src/cipsi_tc_bi_ortho/zmq_selection.irp.f
new file mode 100644
index 00000000..dc3e0f27
--- /dev/null
+++ b/src/cipsi_tc_bi_ortho/zmq_selection.irp.f
@@ -0,0 +1,235 @@
+subroutine ZMQ_selection(N_in, pt2_data)
+  use f77_zmq
+  use selection_types
+
+  implicit none
+
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket , zmq_socket_pull
+  integer, intent(in)            :: N_in
+  type(selection_buffer)         :: b
+  integer                        :: i, l, N
+  integer, external              :: omp_get_thread_num
+  type(pt2_type), intent(inout)  :: pt2_data
+
+  PROVIDE psi_det psi_coef N_det qp_max_mem N_states pt2_F s2_eig N_det_generators
+
+  N = max(N_in,1)
+  N = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
+  if (.True.) then
+    PROVIDE pt2_e0_denominator nproc
+    PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+    PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+    PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+    PROVIDE psi_bilinear_matrix_transp_order selection_weight pseudo_sym
+    PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
+    PROVIDE excitation_beta_max  excitation_alpha_max excitation_max
+
+    call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'selection')
+
+    integer, external              :: zmq_put_psi
+    integer, external              :: zmq_put_N_det_generators
+    integer, external              :: zmq_put_N_det_selectors
+    integer, external              :: zmq_put_dvector
+
+    if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
+      stop 'Unable to put psi on ZMQ server'
+    endif
+    if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then
+      stop 'Unable to put N_det_generators on ZMQ server'
+    endif
+    if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then
+      stop 'Unable to put N_det_selectors on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then
+      stop 'Unable to put energy on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then
+      stop 'Unable to put state_average_weight on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) then
+      stop 'Unable to put selection_weight on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) then
+      stop 'Unable to put threshold_generators on ZMQ server'
+    endif
+    call create_selection_buffer(N, N*2, b)
+  endif
+
+  integer, external :: add_task_to_taskserver
+  character(len=100000)           :: task
+  integer :: j,k,ipos
+  ipos=1
+  task = ' '
+
+ 
+ do i= 1, N_det_generators
+    do j=1,pt2_F(i)
+      write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, i, N
+      ipos += 30
+      if (ipos > 100000-30) then
+        if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+          stop 'Unable to add task to task server'
+        endif
+        ipos=1
+      endif
+    end do
+  enddo
+  if (ipos > 1) then
+    if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+      stop 'Unable to add task to task server'
+    endif
+  endif
+  N = max(N_in,1)
+
+
+  ASSERT (associated(b%det))
+  ASSERT (associated(b%val))
+
+  integer, external :: zmq_set_running
+  if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+    print *,  irp_here, ': Failed in zmq_set_running'
+  endif
+
+  integer :: nproc_target
+  if (N_det < 3*nproc) then
+    nproc_target = N_det/4
+  else
+    nproc_target = nproc
+  endif
+  double precision :: mem
+  mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 +  3.d0 + 5.d0) / (1024.d0**3)
+  call write_double(6,mem,'Estimated memory/thread (Gb)')
+  if (qp_max_mem > 0) then
+    nproc_target = max(1,int(dble(qp_max_mem)/(0.1d0 + mem)))
+    nproc_target = min(nproc_target,nproc)
+  endif
+
+  f(:) = 1.d0
+  if (.not.do_pt2) then
+    double precision :: f(N_states), u_dot_u
+    do k=1,min(N_det,N_states)
+     f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors)
+    enddo
+  endif
+
+  !$OMP PARALLEL DEFAULT(shared)  SHARED(b, pt2_data)  PRIVATE(i) NUM_THREADS(nproc_target+1)
+  i = omp_get_thread_num()
+  if (i==0) then
+    call selection_collector(zmq_socket_pull, b, N, pt2_data)
+  else
+    call selection_slave_inproc(i)
+  endif
+  !$OMP END PARALLEL
+
+  call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'selection')
+  if (N_in > 0) then
+    if (s2_eig) then
+      call make_selection_buffer_s2(b)
+    endif
+    call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0)
+  endif
+  call delete_selection_buffer(b)
+
+  do k=1,N_states
+    pt2_data % pt2(k) = pt2_data % pt2(k) * f(k)
+    pt2_data % variance(k) = pt2_data % variance(k) * f(k)
+    do l=1,N_states
+      pt2_data % overlap(k,l) = pt2_data % overlap(k,l) * dsqrt(f(k)*f(l))
+      pt2_data % overlap(l,k) = pt2_data % overlap(l,k) * dsqrt(f(k)*f(l))
+    enddo
+
+    pt2_data % rpt2(k) =  &
+       pt2_data % pt2(k)/(1.d0 + pt2_data % overlap(k,k))
+  enddo
+
+  pt2_overlap(:,:) = pt2_data % overlap(:,:)
+
+  print *, 'Overlap of perturbed states:'
+  do l=1,N_states
+    print *, pt2_overlap(l,:)
+  enddo
+  print *, '-------'
+  SOFT_TOUCH pt2_overlap
+  call update_pt2_and_variance_weights(pt2_data, N_states)
+
+end subroutine
+
+
+subroutine selection_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+
+  call run_selection_slave(1,i,pt2_e0_denominator)
+end
+
+subroutine selection_collector(zmq_socket_pull, b, N, pt2_data)
+  use f77_zmq
+  use selection_types
+  use bitmasks
+  implicit none
+
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in)            :: N
+  type(pt2_type), intent(inout)  :: pt2_data
+  type(pt2_type)                 :: pt2_data_tmp
+
+  double precision               :: pt2_mwen(N_states)
+  double precision               :: variance_mwen(N_states)
+  double precision               :: norm2_mwen(N_states)
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_pull_socket
+
+  integer :: msg_size, rc, more
+  integer :: acc, i, j, robin, ntask
+  double precision, pointer :: val(:)
+  integer(bit_kind), pointer :: det(:,:,:)
+  integer, allocatable :: task_id(:)
+  type(selection_buffer) :: b2
+
+
+
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+  call create_selection_buffer(N, N*2, b2)
+  integer :: k
+  double precision :: rss
+  double precision, external :: memory_of_int
+  rss = memory_of_int(N_det_generators)
+  call check_mem(rss,irp_here)
+  allocate(task_id(N_det_generators))
+  more = 1
+  pt2_data % pt2(:)      = 0d0
+  pt2_data % variance(:) = 0.d0
+  pt2_data % overlap(:,:) = 0.d0
+  call pt2_alloc(pt2_data_tmp,N_states)
+  do while (more == 1)
+    call pull_selection_results(zmq_socket_pull, pt2_data_tmp, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask)
+
+    call pt2_add(pt2_data, 1.d0, pt2_data_tmp)
+    do i=1, b2%cur
+      call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i))
+      if (b2%val(i) > b%mini) exit
+    end do
+
+    do i=1, ntask
+      if(task_id(i) == 0) then
+          print *,  "Error in collector"
+      endif
+      integer, external :: zmq_delete_task
+      if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) == -1) then
+        stop 'Unable to delete task'
+      endif
+    end do
+  end do
+  call pt2_dealloc(pt2_data_tmp)
+
+
+  call delete_selection_buffer(b2)
+  call sort_selection_buffer(b)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+end subroutine
+
diff --git a/src/cosgtos_ao_int/EZFIO.cfg b/src/cosgtos_ao_int/EZFIO.cfg
new file mode 100644
index 00000000..8edeecd0
--- /dev/null
+++ b/src/cosgtos_ao_int/EZFIO.cfg
@@ -0,0 +1,19 @@
+[ao_expoim_cosgtos]
+type: double precision
+doc: imag part for Exponents for each primitive of each cosGTOs |AO|
+size: (ao_basis.ao_num,ao_basis.ao_prim_num_max)
+interface: ezfio, provider
+
+[use_cosgtos]
+type: logical
+doc: If true, use cosgtos for AO integrals
+interface: ezfio,provider,ocaml
+default: False
+
+[ao_integrals_threshold]
+type: Threshold
+doc: If | (pq|rs) | < `ao_integrals_threshold` then (pq|rs) is zero
+interface: ezfio,provider,ocaml
+default: 1.e-15
+ezfio_name: threshold_ao
+
diff --git a/src/cosgtos_ao_int/NEED b/src/cosgtos_ao_int/NEED
new file mode 100644
index 00000000..43553132
--- /dev/null
+++ b/src/cosgtos_ao_int/NEED
@@ -0,0 +1 @@
+ao_basis
diff --git a/src/cosgtos_ao_int/README.rst b/src/cosgtos_ao_int/README.rst
new file mode 100644
index 00000000..01f25d6d
--- /dev/null
+++ b/src/cosgtos_ao_int/README.rst
@@ -0,0 +1,4 @@
+==============
+cosgtos_ao_int
+==============
+
diff --git a/src/cosgtos_ao_int/aos_cosgtos.irp.f b/src/cosgtos_ao_int/aos_cosgtos.irp.f
new file mode 100644
index 00000000..6a4d54fd
--- /dev/null
+++ b/src/cosgtos_ao_int/aos_cosgtos.irp.f
@@ -0,0 +1,210 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_coef_norm_ord_transp_cosgtos, (ao_prim_num_max, ao_num) ]
+
+  implicit none
+  integer :: i, j
+
+  do j = 1, ao_num
+    do i = 1, ao_prim_num_max
+      ao_coef_norm_ord_transp_cosgtos(i,j) = ao_coef_norm_ord_cosgtos(j,i)
+    enddo
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ complex*16, ao_expo_ord_transp_cosgtos, (ao_prim_num_max, ao_num) ]
+
+  implicit none
+  integer :: i, j
+
+  do j = 1, ao_num
+    do i = 1, ao_prim_num_max
+      ao_expo_ord_transp_cosgtos(i,j) = ao_expo_ord_cosgtos(j,i)
+    enddo
+  enddo
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_coef_norm_cosgtos, (ao_num, ao_prim_num_max) ]
+
+  implicit none
+
+  integer          :: i, j, powA(3), nz
+  double precision :: norm
+  complex*16       :: overlap_x, overlap_y, overlap_z, C_A(3)
+  complex*16       :: integ1, integ2, expo
+
+  nz = 100
+
+  C_A(1) = (0.d0, 0.d0)
+  C_A(2) = (0.d0, 0.d0)
+  C_A(3) = (0.d0, 0.d0)
+
+  ao_coef_norm_cosgtos = 0.d0
+
+  do i = 1, ao_num
+
+    powA(1) = ao_power(i,1)
+    powA(2) = ao_power(i,2)
+    powA(3) = ao_power(i,3)
+
+    ! Normalization of the primitives
+    if(primitives_normalized) then
+
+      do j = 1, ao_prim_num(i)
+
+        expo = ao_expo(i,j) + (0.d0, 1.d0) * ao_expoim_cosgtos(i,j)
+
+        call overlap_cgaussian_xyz(C_A, C_A,        expo, expo, powA, powA, overlap_x, overlap_y, overlap_z, integ1, nz)
+        call overlap_cgaussian_xyz(C_A, C_A, conjg(expo), expo, powA, powA, overlap_x, overlap_y, overlap_z, integ2, nz)
+
+        norm = 2.d0 * real( integ1 + integ2 )
+
+        ao_coef_norm_cosgtos(i,j) = ao_coef(i,j) / dsqrt(norm)
+      enddo
+
+    else
+
+      do j = 1, ao_prim_num(i)
+        ao_coef_norm_cosgtos(i,j) = ao_coef(i,j)
+      enddo
+
+    endif
+
+  enddo
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, ao_coef_norm_ord_cosgtos, (ao_num, ao_prim_num_max) ]
+&BEGIN_PROVIDER [ complex*16      , ao_expo_ord_cosgtos,      (ao_num, ao_prim_num_max) ]
+
+  implicit none
+  integer          :: i, j
+  integer          :: iorder(ao_prim_num_max)
+  double precision :: d(ao_prim_num_max,3)
+
+  d = 0.d0
+
+  do i = 1, ao_num
+
+    do j = 1, ao_prim_num(i)
+      iorder(j) = j
+      d(j,1) = ao_expo(i,j)
+      d(j,2) = ao_coef_norm_cosgtos(i,j)
+      d(j,3) = ao_expoim_cosgtos(i,j)
+    enddo
+
+    call dsort     (d(1,1), iorder, ao_prim_num(i))
+    call dset_order(d(1,2), iorder, ao_prim_num(i))
+    call dset_order(d(1,3), iorder, ao_prim_num(i))
+
+    do j = 1, ao_prim_num(i)
+      ao_expo_ord_cosgtos     (i,j) = d(j,1) + (0.d0, 1.d0) * d(j,3)
+      ao_coef_norm_ord_cosgtos(i,j) = d(j,2)
+    enddo
+
+  enddo
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos,   (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_x, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_y, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_overlap_cosgtos_z, (ao_num, ao_num) ]
+
+  implicit none
+  integer          :: i, j, n, l, dim1, power_A(3), power_B(3)
+  double precision :: c, overlap, overlap_x, overlap_y, overlap_z
+  complex*16       :: alpha, beta, A_center(3), B_center(3)
+  complex*16       :: overlap1, overlap_x1, overlap_y1, overlap_z1
+  complex*16       :: overlap2, overlap_x2, overlap_y2, overlap_z2
+
+  ao_overlap_cosgtos   = 0.d0
+  ao_overlap_cosgtos_x = 0.d0
+  ao_overlap_cosgtos_y = 0.d0
+  ao_overlap_cosgtos_z = 0.d0
+
+  dim1 = 100
+
+ !$OMP PARALLEL DO SCHEDULE(GUIDED)                                                                 &
+ !$OMP DEFAULT(NONE)                                                                                &
+ !$OMP PRIVATE( A_center, B_center, power_A, power_B, alpha, beta, i, j, n, l, c                    &
+ !$OMP        , overlap_x , overlap_y , overlap_z , overlap                                         &
+ !$OMP        , overlap_x1, overlap_y1, overlap_z1, overlap1                                        &
+ !$OMP        , overlap_x2, overlap_y2, overlap_z2, overlap2 )                                      &
+ !$OMP SHARED( nucl_coord, ao_power, ao_prim_num, ao_num, ao_nucl, dim1                             &
+ !$OMP       , ao_overlap_cosgtos_x, ao_overlap_cosgtos_y, ao_overlap_cosgtos_z, ao_overlap_cosgtos &
+ !$OMP       , ao_coef_norm_ord_transp_cosgtos, ao_expo_ord_transp_cosgtos )
+
+  do j = 1, ao_num
+
+    A_center(1) = nucl_coord(ao_nucl(j),1) * (1.d0, 0.d0) 
+    A_center(2) = nucl_coord(ao_nucl(j),2) * (1.d0, 0.d0)
+    A_center(3) = nucl_coord(ao_nucl(j),3) * (1.d0, 0.d0)
+    power_A(1)  = ao_power(j,1)
+    power_A(2)  = ao_power(j,2)
+    power_A(3)  = ao_power(j,3)
+
+    do i = 1, ao_num
+
+      B_center(1) = nucl_coord(ao_nucl(i),1) * (1.d0, 0.d0) 
+      B_center(2) = nucl_coord(ao_nucl(i),2) * (1.d0, 0.d0)
+      B_center(3) = nucl_coord(ao_nucl(i),3) * (1.d0, 0.d0)
+      power_B(1)  = ao_power(i,1)
+      power_B(2)  = ao_power(i,2)
+      power_B(3)  = ao_power(i,3)
+
+      do n = 1, ao_prim_num(j)
+        alpha = ao_expo_ord_transp_cosgtos(n,j)
+
+        do l = 1, ao_prim_num(i)
+          c    = ao_coef_norm_ord_transp_cosgtos(n,j) * ao_coef_norm_ord_transp_cosgtos(l,i)
+          beta = ao_expo_ord_transp_cosgtos(l,i)
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                    , overlap_x1, overlap_y1, overlap_z1, overlap1, dim1 )
+
+          call overlap_cgaussian_xyz( A_center, B_center, conjg(alpha), beta, power_A, power_B &
+                                    , overlap_x2, overlap_y2, overlap_z2, overlap2, dim1       )
+
+          overlap_x = 2.d0 * real( overlap_x1 + overlap_x2 )
+          overlap_y = 2.d0 * real( overlap_y1 + overlap_y2 )
+          overlap_z = 2.d0 * real( overlap_z1 + overlap_z2 )
+          overlap   = 2.d0 * real( overlap1   + overlap2   )
+
+          ao_overlap_cosgtos(i,j) = ao_overlap_cosgtos(i,j) + c * overlap
+
+          if( isnan(ao_overlap_cosgtos(i,j)) ) then
+            print*,'i, j', i, j
+            print*,'l, n', l, n
+            print*,'c, overlap', c, overlap
+            print*, overlap_x, overlap_y, overlap_z
+            stop
+          endif
+
+          ao_overlap_cosgtos_x(i,j) = ao_overlap_cosgtos_x(i,j) + c * overlap_x
+          ao_overlap_cosgtos_y(i,j) = ao_overlap_cosgtos_y(i,j) + c * overlap_y
+          ao_overlap_cosgtos_z(i,j) = ao_overlap_cosgtos_z(i,j) + c * overlap_z
+
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+! ---
+
+
+
diff --git a/src/cosgtos_ao_int/cosgtos_ao_int.irp.f b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f
new file mode 100644
index 00000000..d65dfba5
--- /dev/null
+++ b/src/cosgtos_ao_int/cosgtos_ao_int.irp.f
@@ -0,0 +1,7 @@
+program cosgtos_ao_int
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+end
diff --git a/src/cosgtos_ao_int/expoim_opt.py b/src/cosgtos_ao_int/expoim_opt.py
new file mode 100644
index 00000000..d15b0151
--- /dev/null
+++ b/src/cosgtos_ao_int/expoim_opt.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python
+
+import sys
+import os
+import subprocess
+from datetime import datetime
+import time
+import numpy as np
+from modif_powell_imp import my_fmin_powell
+
+QP_PATH=os.environ["QP_ROOT"]
+sys.path.insert(0, QP_PATH+"external/ezfio/Python")
+from ezfio import ezfio
+
+
+
+
+
+#------------------------------------------------------------------------------
+#
+def get_expoim():
+
+    expo_im = np.array(ezfio.get_cosgtos_ao_int_ao_expoim_cosgtos()).T
+    #print(expo_im.shape)
+
+    x = []
+    for i in range(ao_num):
+        for j in range(ao_prim_num[i]):
+            x.append(expo_im[i,j])
+
+    return x
+
+# ---
+
+def set_expoim(x):
+
+    expo_im = np.zeros((ao_num, ao_prim_num_max))
+
+    ii = 0
+    for i in range(ao_num):
+        for j in range(ao_prim_num[i]):
+            expo_im[i,j] = x[ii]
+            ii = ii + 1
+
+    ezfio.set_cosgtos_ao_int_ao_expoim_cosgtos(expo_im.T)
+#
+#------------------------------------------------------------------------------
+
+
+#------------------------------------------------------------------------------
+#
+def save_res(results, file_output):
+
+    lines = results.splitlines()
+    with open(file_output, "w") as f:
+        for line in lines:
+            f.write(f"{line}\n")
+
+#
+#------------------------------------------------------------------------------
+
+
+
+#------------------------------------------------------------------------------
+#
+def get_scfenergy(results):
+
+    scf_energy = 0.0
+
+    lines = results.splitlines()
+    for line in lines:
+        if("SCF energy" in line):
+            scf_energy = float(line.split()[-1])
+
+    return scf_energy
+#
+#------------------------------------------------------------------------------
+
+
+
+
+#------------------------------------------------------------------------------
+#
+def run_scf():
+
+    return subprocess.check_output( ['qp_run', 'scf', EZFIO_file]
+                                  , encoding = "utf-8" )
+#
+#------------------------------------------------------------------------------
+
+
+
+#------------------------------------------------------------------------------
+#
+def f_scf(x):
+
+    global i_call
+    i_call += 1
+
+    #print(x)
+
+    # set expo
+    set_expoim(x)
+
+    # run scf
+    results = run_scf()
+    #save_res(results, "scf_"+str(i_call))
+    
+
+    # get scf_energy
+    scf_energy = get_scfenergy(results)
+    print( scf_energy )
+    sys.stdout.flush()
+
+    return scf_energy
+#
+#------------------------------------------------------------------------------
+
+
+
+
+if __name__ == '__main__':
+
+    t0 = time.time()
+
+    EZFIO_file = sys.argv[1]
+    ezfio.set_file(EZFIO_file)
+    print(" Today's date:", datetime.now() )
+    print(" EZFIO file = {}".format(EZFIO_file))
+
+
+    ao_num = ezfio.get_ao_basis_ao_num()
+    print(f" ao_num = {ao_num}")
+
+    ao_prim_num = ezfio.get_ao_basis_ao_prim_num()
+
+    ao_prim_num_max = np.amax(ao_prim_num)
+    print(f" ao_prim_num_max = {ao_prim_num_max}")
+
+    ezfio.set_ao_basis_ao_prim_num_max(ao_prim_num_max)
+
+    x = get_expoim()
+
+    n_par = len(x)
+    print(' nb of parameters = {}'.format(n_par))
+
+    sys.stdout.flush()
+
+    #x    = (np.random.rand(n_par) - 0.5) * 1.0
+    x     = [ (+0.00) for _ in range(n_par)]
+
+    x_min = [ (-10.0) for _ in range(n_par)]
+    x_max = [ (+10.0) for _ in range(n_par)]
+
+    i_call = 0
+    memo_val = {'fmin': 100.}
+
+    opt = my_fmin_powell( f_scf
+                        , x, x_min, x_max
+                        #, xtol        = 1e-1
+                        #, ftol        = 1e-1
+                        , maxfev      = 1e8
+                        , full_output = 1
+                        , verbose     = 1 )
+
+
+    print(" x = " + str(opt))
+
+    print(" end after {:.3f} minutes".format((time.time()-t0)/60.) )
+
+    # !!!
+# !!!
diff --git a/src/cosgtos_ao_int/gauss_legendre.irp.f b/src/cosgtos_ao_int/gauss_legendre.irp.f
new file mode 100644
index 00000000..4bdadb6e
--- /dev/null
+++ b/src/cosgtos_ao_int/gauss_legendre.irp.f
@@ -0,0 +1,57 @@
+ BEGIN_PROVIDER [ double precision, gauleg_t2, (n_pt_max_integrals,n_pt_max_integrals/2) ]
+&BEGIN_PROVIDER [ double precision, gauleg_w, (n_pt_max_integrals,n_pt_max_integrals/2) ]
+   implicit none
+   BEGIN_DOC
+   ! t_w(i,1,k) = w(i)
+   ! t_w(i,2,k) = t(i)
+   END_DOC
+   integer                        :: i,j,l
+   l=0
+   do i = 2,n_pt_max_integrals,2
+     l = l+1
+     call gauleg(0.d0,1.d0,gauleg_t2(1,l),gauleg_w(1,l),i)
+     do j=1,i
+       gauleg_t2(j,l) *= gauleg_t2(j,l)
+     enddo
+   enddo
+
+END_PROVIDER
+
+subroutine gauleg(x1,x2,x,w,n)
+   implicit none
+   BEGIN_DOC
+   ! Gauss-Legendre
+   END_DOC
+   integer, intent(in)            :: n
+   double precision, intent(in)   :: x1, x2
+   double precision, intent (out) :: x(n),w(n)
+   double precision, parameter    :: eps=3.d-14
+
+   integer                        :: m,i,j
+   double precision               :: xm, xl, z, z1, p1, p2, p3, pp, dn
+   m=(n+1)/2
+   xm=0.5d0*(x2+x1)
+   xl=0.5d0*(x2-x1)
+   dn = dble(n)
+   do i=1,m
+     z=dcos(3.141592654d0*(dble(i)-.25d0)/(dble(n)+.5d0))
+     z1 = z+1.d0
+     do while (dabs(z-z1) > eps)
+       p1=1.d0
+       p2=0.d0
+       do j=1,n
+         p3=p2
+         p2=p1
+         p1=(dble(j+j-1)*z*p2-dble(j-1)*p3)/j
+       enddo
+       pp=dn*(z*p1-p2)/(z*z-1.d0)
+       z1=z
+       z=z1-p1/pp
+     end do
+     x(i)=xm-xl*z
+     x(n+1-i)=xm+xl*z
+     w(i)=(xl+xl)/((1.d0-z*z)*pp*pp)
+     w(n+1-i)=w(i)
+   enddo
+end
+
diff --git a/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f b/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f
new file mode 100644
index 00000000..7f94f226
--- /dev/null
+++ b/src/cosgtos_ao_int/one_e_Coul_integrals.irp.f
@@ -0,0 +1,535 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_integrals_n_e_cosgtos, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  !
+  !  Nucleus-electron interaction, in the cosgtos |AO| basis set.
+  !
+  !  :math:`\langle \chi_i | -\sum_A \frac{1}{|r-R_A|} | \chi_j \rangle`
+  !
+  END_DOC
+
+  implicit none
+  integer          :: num_A, num_B, power_A(3), power_B(3)
+  integer          :: i, j, k, l, n_pt_in, m
+  double precision :: c, Z, A_center(3), B_center(3), C_center(3)
+  complex*16       :: alpha, beta, c1, c2
+
+  complex*16       :: NAI_pol_mult_cosgtos
+
+  ao_integrals_n_e_cosgtos = 0.d0
+
+ !$OMP PARALLEL                                                                           &
+ !$OMP DEFAULT (NONE)                                                                     &
+ !$OMP PRIVATE ( i, j, k, l, m, alpha, beta, A_center, B_center, C_center                 &
+ !$OMP         , power_A, power_B, num_A, num_B, Z, c, c1, c2, n_pt_in )                  &
+ !$OMP SHARED ( ao_num, ao_prim_num, ao_nucl, nucl_coord, ao_power, nucl_num, nucl_charge &
+ !$OMP        , ao_expo_ord_transp_cosgtos, ao_coef_norm_ord_transp_cosgtos               &
+ !$OMP        , n_pt_max_integrals, ao_integrals_n_e_cosgtos )
+
+ n_pt_in = n_pt_max_integrals
+
+ !$OMP DO SCHEDULE (dynamic)
+
+  do j = 1, ao_num
+    num_A         = ao_nucl(j)
+    power_A(1:3)  = ao_power(j,1:3)
+    A_center(1:3) = nucl_coord(num_A,1:3)
+
+    do i = 1, ao_num
+      num_B         = ao_nucl(i)
+      power_B(1:3)  = ao_power(i,1:3)
+      B_center(1:3) = nucl_coord(num_B,1:3)
+
+      do l = 1, ao_prim_num(j)
+        alpha = ao_expo_ord_transp_cosgtos(l,j)
+
+        do m = 1, ao_prim_num(i)
+          beta = ao_expo_ord_transp_cosgtos(m,i)
+
+          c = 0.d0
+          do k = 1, nucl_num
+
+            Z = nucl_charge(k)
+
+            C_center(1:3) = nucl_coord(k,1:3)
+
+            !print *, ' '
+            !print *, A_center, B_center, C_center, power_A, power_B
+            !print *, real(alpha), real(beta)
+
+            c1 = NAI_pol_mult_cosgtos( A_center, B_center, power_A, power_B &
+                                     , alpha, beta, C_center, n_pt_in )
+
+            !c2 = c1
+            c2 = NAI_pol_mult_cosgtos( A_center, B_center, power_A, power_B  &
+                                     , conjg(alpha), beta, C_center, n_pt_in )
+
+            !print *, ' c1 = ', real(c1)
+            !print *, ' c2 = ', real(c2)
+
+            c = c - Z * 2.d0 * real(c1 + c2)
+
+          enddo
+          ao_integrals_n_e_cosgtos(i,j) = ao_integrals_n_e_cosgtos(i,j) &
+                      + ao_coef_norm_ord_transp_cosgtos(l,j)            &
+                      * ao_coef_norm_ord_transp_cosgtos(m,i) * c
+        enddo
+      enddo
+    enddo
+  enddo
+
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+END_PROVIDER
+
+! ---
+
+complex*16 function NAI_pol_mult_cosgtos(A_center, B_center, power_A, power_B, alpha, beta, C_center, n_pt_in)
+
+  BEGIN_DOC
+  !
+  ! Computes the electron-nucleus attraction with two primitves cosgtos.
+  !
+  ! :math:`\langle g_i | \frac{1}{|r-R_c|} | g_j \rangle`
+  !
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,          intent(in) :: n_pt_in, power_A(3), power_B(3)
+  double precision, intent(in) :: C_center(3), A_center(3), B_center(3)
+  complex*16,       intent(in) :: alpha, beta
+
+  integer                      :: i, n_pt, n_pt_out
+  double precision             :: dist, const_mod
+  complex*16                   :: p, p_inv, rho, dist_integral, const, const_factor, coeff, factor
+  complex*16                   :: accu, P_center(3)
+  complex*16                   :: d(0:n_pt_in)
+
+  complex*16                   :: V_n_e_cosgtos
+  complex*16                   :: crint
+
+  if ( (A_center(1)/=B_center(1)) .or. (A_center(2)/=B_center(2)) .or. (A_center(3)/=B_center(3)) .or. &
+       (A_center(1)/=C_center(1)) .or. (A_center(2)/=C_center(2)) .or. (A_center(3)/=C_center(3)) ) then
+
+    continue
+
+  else
+
+    NAI_pol_mult_cosgtos = V_n_e_cosgtos( power_A(1), power_A(2), power_A(3) &
+                                        , power_B(1), power_B(2), power_B(3) &
+                                        , alpha, beta )
+    return
+
+  endif
+
+  p     = alpha + beta
+  p_inv = (1.d0, 0.d0) / p
+  rho   = alpha * beta * p_inv
+
+  dist          = 0.d0
+  dist_integral = (0.d0, 0.d0)
+  do i = 1, 3
+    P_center(i)    = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+    dist          += (A_center(i) - B_center(i)) * (A_center(i) - B_center(i))
+    dist_integral += (P_center(i) - C_center(i)) * (P_center(i) - C_center(i))
+  enddo
+
+  const_factor = dist * rho
+  const        = p * dist_integral
+
+  const_mod = dsqrt(real(const_factor)*real(const_factor) + aimag(const_factor)*aimag(const_factor))
+  if(const_mod > 80.d0) then
+    NAI_pol_mult_cosgtos = (0.d0, 0.d0)
+    return
+  endif
+
+  factor = zexp(-const_factor)
+  coeff  = dtwo_pi * factor * p_inv
+
+  do i = 0, n_pt_in
+    d(i) = (0.d0, 0.d0)
+  enddo
+
+  n_pt = 2 * ( (power_A(1) + power_B(1)) + (power_A(2) + power_B(2)) + (power_A(3) + power_B(3)) )
+  if(n_pt == 0) then
+    NAI_pol_mult_cosgtos = coeff * crint(0, const)
+    return
+  endif
+
+  call give_cpolynomial_mult_center_one_e( A_center, B_center, alpha, beta &
+                                         , power_A, power_B, C_center, n_pt_in, d, n_pt_out)
+
+  if(n_pt_out < 0) then
+    NAI_pol_mult_cosgtos = (0.d0, 0.d0)
+    return
+  endif
+
+  accu = (0.d0, 0.d0)
+  do i = 0, n_pt_out, 2
+    accu += crint(shiftr(i, 1), const) * d(i)
+
+!    print *, shiftr(i, 1), real(const), real(d(i)), real(crint(shiftr(i, 1), const))
+  enddo
+  NAI_pol_mult_cosgtos = accu * coeff
+
+end function NAI_pol_mult_cosgtos
+
+! ---
+
+subroutine give_cpolynomial_mult_center_one_e( A_center, B_center, alpha, beta &
+                                             , power_A, power_B, C_center, n_pt_in, d, n_pt_out)
+
+  BEGIN_DOC
+  ! Returns the explicit polynomial in terms of the "t" variable of the following
+  !
+  ! $I_{x1}(a_x, d_x,p,q) \times I_{x1}(a_y, d_y,p,q) \times I_{x1}(a_z, d_z,p,q)$.
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in) :: n_pt_in, power_A(3), power_B(3)
+  double precision, intent(in) :: A_center(3), B_center(3), C_center(3)
+  complex*16,       intent(in) :: alpha, beta
+  integer,         intent(out) :: n_pt_out
+  complex*16,      intent(out) :: d(0:n_pt_in)
+
+  integer                      :: a_x, b_x, a_y, b_y, a_z, b_z
+  integer                      :: n_pt1, n_pt2, n_pt3, dim, i, n_pt_tmp
+  complex*16                   :: p, P_center(3), rho, p_inv, p_inv_2
+  complex*16                   :: R1x(0:2), B01(0:2), R1xp(0:2),R2x(0:2)
+  complex*16                   :: d1(0:n_pt_in), d2(0:n_pt_in), d3(0:n_pt_in)
+
+  ASSERT (n_pt_in > 1)
+
+  p       = alpha + beta
+  p_inv   = (1.d0, 0.d0) / p
+  p_inv_2 = 0.5d0 * p_inv
+
+  do i = 1, 3
+    P_center(i) = (alpha * A_center(i) + beta * B_center(i)) * p_inv
+  enddo
+
+  do i = 0, n_pt_in
+    d(i)  = (0.d0, 0.d0)
+    d1(i) = (0.d0, 0.d0)
+    d2(i) = (0.d0, 0.d0)
+    d3(i) = (0.d0, 0.d0)
+  enddo
+
+  ! ---
+
+  n_pt1 = n_pt_in
+
+  R1x(0)  =  (P_center(1) - A_center(1))
+  R1x(1)  =  (0.d0, 0.d0)
+  R1x(2)  = -(P_center(1) - C_center(1))
+
+  R1xp(0) =  (P_center(1) - B_center(1))
+  R1xp(1) =  (0.d0, 0.d0)
+  R1xp(2) = -(P_center(1) - C_center(1))
+
+  R2x(0)  =  p_inv_2
+  R2x(1)  = (0.d0, 0.d0)
+  R2x(2)  = -p_inv_2
+
+  a_x = power_A(1)
+  b_x = power_B(1)
+  call I_x1_pol_mult_one_e_cosgtos(a_x, b_x, R1x, R1xp, R2x, d1, n_pt1, n_pt_in)
+
+  if(n_pt1 < 0) then
+    n_pt_out = -1
+    do i = 0, n_pt_in
+      d(i) = (0.d0, 0.d0)
+    enddo
+    return
+  endif
+
+  ! ---
+
+  n_pt2 = n_pt_in
+
+  R1x(0)  =  (P_center(2) - A_center(2))
+  R1x(1)  =  (0.d0, 0.d0)
+  R1x(2)  = -(P_center(2) - C_center(2))
+
+  R1xp(0) =  (P_center(2) - B_center(2))
+  R1xp(1) =  (0.d0, 0.d0)
+  R1xp(2) = -(P_center(2) - C_center(2))
+
+  a_y = power_A(2)
+  b_y = power_B(2)
+  call I_x1_pol_mult_one_e_cosgtos(a_y, b_y, R1x, R1xp, R2x, d2, n_pt2, n_pt_in)
+
+  if(n_pt2 < 0) then
+    n_pt_out = -1
+    do i = 0, n_pt_in
+      d(i) = (0.d0, 0.d0)
+    enddo
+    return
+  endif
+
+  ! ---
+
+  n_pt3 = n_pt_in
+
+  R1x(0)  =  (P_center(3) - A_center(3))
+  R1x(1)  =  (0.d0, 0.d0)
+  R1x(2)  = -(P_center(3) - C_center(3))
+
+  R1xp(0) =  (P_center(3) - B_center(3))
+  R1xp(1) =  (0.d0, 0.d0)
+  R1xp(2) = -(P_center(3) - C_center(3))
+
+  a_z = power_A(3)
+  b_z = power_B(3)
+  call I_x1_pol_mult_one_e_cosgtos(a_z, b_z, R1x, R1xp, R2x, d3, n_pt3, n_pt_in)
+
+  if(n_pt3 < 0) then
+    n_pt_out = -1
+    do i = 0, n_pt_in
+      d(i) = (0.d0, 0.d0)
+    enddo
+    return
+  endif
+
+  ! ---
+
+  n_pt_tmp = 0
+  call multiply_cpoly(d1, n_pt1, d2, n_pt2, d, n_pt_tmp)
+  do i = 0, n_pt_tmp
+    d1(i) = (0.d0, 0.d0)
+  enddo
+
+  n_pt_out = 0
+  call multiply_cpoly(d, n_pt_tmp, d3, n_pt3, d1, n_pt_out)
+  do i = 0, n_pt_out
+    d(i) = d1(i)
+  enddo
+
+end subroutine give_cpolynomial_mult_center_one_e
+
+! ---
+
+recursive subroutine I_x1_pol_mult_one_e_cosgtos(a, c, R1x, R1xp, R2x, d, nd, n_pt_in)
+
+  BEGIN_DOC
+  !  Recursive routine involved in the electron-nucleus potential
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: a, c, n_pt_in
+  complex*16, intent(in)    :: R1x(0:2), R1xp(0:2), R2x(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:n_pt_in)
+
+  integer                   :: nx, ix, dim, iy, ny
+  complex*16                :: X(0:max_dim)
+  complex*16                :: Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
+
+  dim = n_pt_in
+
+  if( (a==0) .and. (c==0)) then
+
+    nd   = 0
+    d(0) = (1.d0, 0.d0)
+    return
+
+  elseif( (c < 0) .or. (nd < 0) ) then
+
+    nd = -1
+    return
+
+  elseif((a == 0) .and. (c .ne. 0)) then
+
+    call I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, d, nd, n_pt_in)
+
+  elseif(a == 1) then
+
+    nx = nd
+    do ix = 0, n_pt_in
+      X(ix) = (0.d0, 0.d0)
+      Y(ix) = (0.d0, 0.d0)
+    enddo
+
+    call I_x2_pol_mult_one_e_cosgtos(c-1, R1x, R1xp, R2x, X, nx, n_pt_in)
+
+    do ix = 0, nx
+      X(ix) *= dble(c)
+    enddo
+
+    call multiply_cpoly(X, nx, R2x, 2, d, nd)
+
+    ny = 0
+    call I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, Y, ny, n_pt_in)
+    call multiply_cpoly(Y, ny, R1x, 2, d, nd)
+
+  else
+
+    nx = 0
+    do ix = 0, n_pt_in
+      X(ix) = (0.d0, 0.d0)
+      Y(ix) = (0.d0, 0.d0)
+    enddo
+
+    call I_x1_pol_mult_one_e_cosgtos(a-2, c, R1x, R1xp, R2x, X, nx, n_pt_in)
+
+    do ix = 0, nx
+      X(ix) *= dble(a-1)
+    enddo
+    call multiply_cpoly(X, nx, R2x, 2, d, nd)
+
+    nx = nd
+    do ix = 0, n_pt_in
+      X(ix) = (0.d0, 0.d0)
+    enddo
+
+    call I_x1_pol_mult_one_e_cosgtos(a-1, c-1, R1x, R1xp, R2x, X, nx, n_pt_in)
+    do ix = 0, nx
+      X(ix) *= dble(c)
+    enddo
+
+    call multiply_cpoly(X, nx, R2x, 2, d, nd)
+
+    ny = 0
+    call I_x1_pol_mult_one_e_cosgtos(a-1, c, R1x, R1xp, R2x, Y, ny, n_pt_in)
+    call multiply_cpoly(Y, ny, R1x, 2, d, nd)
+
+  endif
+
+end subroutine I_x1_pol_mult_one_e_cosgtos
+
+! ---
+
+recursive subroutine I_x2_pol_mult_one_e_cosgtos(c, R1x, R1xp, R2x, d, nd, dim)
+
+  BEGIN_DOC
+  !  Recursive routine involved in the electron-nucleus potential
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: dim, c
+  complex*16, intent(in)    :: R1x(0:2), R1xp(0:2), R2x(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(out)   :: d(0:max_dim)
+
+  integer                   :: i, nx, ix, ny
+  complex*16                :: X(0:max_dim), Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
+
+  if(c == 0) then
+
+    nd = 0
+    d(0) = (1.d0, 0.d0)
+    return
+
+  elseif((nd < 0) .or. (c < 0)) then
+
+    nd = -1
+    return
+
+  else
+
+    nx = 0
+    do ix = 0, dim
+      X(ix) = (0.d0, 0.d0)
+      Y(ix) = (0.d0, 0.d0)
+    enddo
+
+    call I_x1_pol_mult_one_e_cosgtos(0, c-2, R1x, R1xp, R2x, X, nx, dim)
+
+    do ix = 0, nx
+      X(ix) *= dble(c-1)
+    enddo
+
+    call multiply_cpoly(X, nx, R2x, 2, d, nd)
+
+    ny = 0
+    do ix = 0, dim
+      Y(ix) = (0.d0, 0.d0)
+    enddo
+
+    call I_x1_pol_mult_one_e_cosgtos(0, c-1, R1x, R1xp, R2x, Y, ny, dim)
+
+    if(ny .ge. 0) then
+      call multiply_cpoly(Y, ny, R1xp, 2, d, nd)
+    endif
+
+  endif
+
+end subroutine I_x2_pol_mult_one_e_cosgtos
+
+! ---
+
+complex*16 function V_n_e_cosgtos(a_x, a_y, a_z, b_x, b_y, b_z, alpha, beta)
+
+  BEGIN_DOC
+  ! Primitve nuclear attraction between the two primitves centered on the same atom.
+  !
+  ! $p_1 = x^{a_x} y^{a_y} z^{a_z} \exp(-\alpha r^2)$
+  !
+  ! $p_2 = x^{b_x} y^{b_y} z^{b_z} \exp(-\beta  r^2)$
+  END_DOC
+
+  implicit none
+
+  integer,    intent(in) :: a_x, a_y, a_z, b_x, b_y, b_z
+  complex*16, intent(in) :: alpha, beta
+
+  double precision       :: V_phi, V_theta
+  complex*16             :: V_r_cosgtos
+
+  if( (iand(a_x + b_x, 1) == 1) .or. &
+      (iand(a_y + b_y, 1) == 1) .or. &
+      (iand(a_z + b_z, 1) == 1) ) then
+
+    V_n_e_cosgtos = (0.d0, 0.d0)
+
+  else
+
+    V_n_e_cosgtos = V_r_cosgtos(a_x + b_x + a_y + b_y + a_z + b_z + 1, alpha + beta) &
+                  * V_phi(a_x + b_x, a_y + b_y)                                      &
+                  * V_theta(a_z + b_z, a_x + b_x + a_y + b_y + 1)
+  endif
+
+end function V_n_e_cosgtos
+
+! ---
+
+complex*16 function V_r_cosgtos(n, alpha)
+
+  BEGIN_DOC
+  ! Computes the radial part of the nuclear attraction integral:
+  !
+  ! $\int_{0}^{\infty} r^n  \exp(-\alpha  r^2)  dr$
+  !
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer   , intent(in) :: n
+  complex*16, intent(in) :: alpha
+
+  double precision       :: fact
+
+  if(iand(n, 1) .eq. 1) then
+    V_r_cosgtos = 0.5d0 * fact(shiftr(n, 1)) / (alpha**(shiftr(n, 1) + 1))
+  else
+    V_r_cosgtos = sqpi * fact(n) / fact(shiftr(n, 1)) * (0.5d0/zsqrt(alpha))**(n+1)
+  endif
+
+end function V_r_cosgtos
+
+! ---
+
diff --git a/src/cosgtos_ao_int/one_e_kin_integrals.irp.f b/src/cosgtos_ao_int/one_e_kin_integrals.irp.f
new file mode 100644
index 00000000..710b04d4
--- /dev/null
+++ b/src/cosgtos_ao_int/one_e_kin_integrals.irp.f
@@ -0,0 +1,223 @@
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_x, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_y, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_deriv2_cosgtos_z, (ao_num, ao_num) ]
+
+  implicit none
+  integer          :: i, j, n, l, dim1, power_A(3), power_B(3)
+  double precision :: c, deriv_tmp
+  complex*16       :: alpha, beta, A_center(3), B_center(3)
+  complex*16       :: overlap_x, overlap_y, overlap_z, overlap
+  complex*16       :: overlap_x0_1, overlap_y0_1, overlap_z0_1
+  complex*16       :: overlap_x0_2, overlap_y0_2, overlap_z0_2 
+  complex*16       :: overlap_m2_1, overlap_p2_1
+  complex*16       :: overlap_m2_2, overlap_p2_2
+  complex*16       :: deriv_tmp_1, deriv_tmp_2
+
+
+  dim1 = 100
+
+  ! -- Dummy call to provide everything
+
+  A_center(:) = (0.0d0, 0.d0)
+  B_center(:) = (1.0d0, 0.d0)
+  alpha       = (1.0d0, 0.d0)
+  beta        = (0.1d0, 0.d0)
+  power_A     = 1
+  power_B     = 0
+  call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B       &
+                            , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap, dim1 )
+
+  ! ---
+
+ !$OMP PARALLEL DO SCHEDULE(GUIDED)                                                                  &
+ !$OMP DEFAULT(NONE)                                                                                 &
+ !$OMP PRIVATE( A_center, B_center, power_A, power_B, alpha, beta, i, j, l, n, c                     &
+ !$OMP        , deriv_tmp, deriv_tmp_1, deriv_tmp_2                                                  &
+ !$OMP        , overlap_x, overlap_y, overlap_z, overlap                                             &
+ !$OMP        , overlap_m2_1, overlap_p2_1, overlap_m2_2, overlap_p2_2                               &
+ !$OMP        , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap_x0_2, overlap_y0_2, overlap_z0_2 ) &
+ !$OMP SHARED( nucl_coord, ao_power, ao_prim_num, ao_num, ao_nucl, dim1                              &
+ !$OMP       , ao_coef_norm_ord_transp_cosgtos, ao_expo_ord_transp_cosgtos                           & 
+ !$OMP       , ao_deriv2_cosgtos_x, ao_deriv2_cosgtos_y, ao_deriv2_cosgtos_z ) 
+
+  do j = 1, ao_num
+    A_center(1) = nucl_coord(ao_nucl(j),1) * (1.d0, 0.d0)
+    A_center(2) = nucl_coord(ao_nucl(j),2) * (1.d0, 0.d0)
+    A_center(3) = nucl_coord(ao_nucl(j),3) * (1.d0, 0.d0)
+    power_A(1)  = ao_power(j,1)
+    power_A(2)  = ao_power(j,2)
+    power_A(3)  = ao_power(j,3)
+
+    do i = 1, ao_num
+      B_center(1) = nucl_coord(ao_nucl(i),1) * (1.d0, 0.d0) 
+      B_center(2) = nucl_coord(ao_nucl(i),2) * (1.d0, 0.d0)
+      B_center(3) = nucl_coord(ao_nucl(i),3) * (1.d0, 0.d0)
+      power_B(1)  = ao_power(i,1)
+      power_B(2)  = ao_power(i,2)
+      power_B(3)  = ao_power(i,3)
+
+      ao_deriv2_cosgtos_x(i,j) = 0.d0
+      ao_deriv2_cosgtos_y(i,j) = 0.d0
+      ao_deriv2_cosgtos_z(i,j) = 0.d0
+
+      do n = 1, ao_prim_num(j)
+        alpha = ao_expo_ord_transp_cosgtos(n,j)
+
+        do l = 1, ao_prim_num(i)
+          c    = ao_coef_norm_ord_transp_cosgtos(n,j) * ao_coef_norm_ord_transp_cosgtos(l,i)
+          beta = ao_expo_ord_transp_cosgtos(l,i)
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B       &
+                                    , overlap_x0_1, overlap_y0_1, overlap_z0_1, overlap, dim1 )
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                    , overlap_x0_2, overlap_y0_2, overlap_z0_2, overlap, dim1  )
+
+          ! ---
+
+          power_A(1) = power_A(1) - 2
+          if(power_A(1) > -1) then
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                      , overlap_m2_1, overlap_y, overlap_z, overlap, dim1 )
+
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                      , overlap_m2_2, overlap_y, overlap_z, overlap, dim1        )
+          else
+            overlap_m2_1 = (0.d0, 0.d0)
+            overlap_m2_2 = (0.d0, 0.d0)
+          endif
+
+          power_A(1) = power_A(1) + 4
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                    , overlap_p2_1, overlap_y, overlap_z, overlap, dim1 )
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                    , overlap_p2_2, overlap_y, overlap_z, overlap, dim1        )
+
+          power_A(1) = power_A(1) - 2
+
+          deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(1) + 1.d0) * overlap_x0_1 &
+                        + power_A(1) * (power_A(1) - 1.d0) * overlap_m2_1           &
+                        + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_y0_1 * overlap_z0_1
+
+          deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(1) + 1.d0) * overlap_x0_2 &
+                        + power_A(1) * (power_A(1) - 1.d0) * overlap_m2_2           &
+                        + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_y0_2 * overlap_z0_2
+
+          deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2)
+
+          ao_deriv2_cosgtos_x(i,j) += c * deriv_tmp
+
+          ! ---
+
+          power_A(2) = power_A(2) - 2
+          if(power_A(2) > -1) then
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                      , overlap_x, overlap_m2_1, overlap_y, overlap, dim1 )
+
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                      , overlap_x, overlap_m2_2, overlap_y, overlap, dim1        )
+          else
+            overlap_m2_1 = (0.d0, 0.d0)
+            overlap_m2_2 = (0.d0, 0.d0)
+          endif
+
+          power_A(2) = power_A(2) + 4
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                    , overlap_x, overlap_p2_1, overlap_y, overlap, dim1 )
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                    , overlap_x, overlap_p2_2, overlap_y, overlap, dim1        )
+
+          power_A(2) = power_A(2) - 2
+
+          deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(2) + 1.d0) * overlap_y0_1 &
+                        + power_A(2) * (power_A(2) - 1.d0) * overlap_m2_1           &
+                        + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_x0_1 * overlap_z0_1
+
+          deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(2) + 1.d0) * overlap_y0_2 &
+                        + power_A(2) * (power_A(2) - 1.d0) * overlap_m2_2           &
+                        + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_x0_2 * overlap_z0_2
+
+          deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2)
+
+          ao_deriv2_cosgtos_y(i,j) += c * deriv_tmp
+
+          ! ---
+
+          power_A(3) = power_A(3) - 2
+          if(power_A(3) > -1) then
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                      , overlap_x, overlap_y, overlap_m2_1, overlap, dim1 )
+
+            call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                      , overlap_x, overlap_y, overlap_m2_2, overlap, dim1        )
+          else
+            overlap_m2_1 = (0.d0, 0.d0)
+            overlap_m2_2 = (0.d0, 0.d0)
+          endif
+
+          power_A(3) = power_A(3) + 4
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, beta, power_A, power_B &
+                                    , overlap_x, overlap_y, overlap_p2_1, overlap, dim1 )
+
+          call overlap_cgaussian_xyz( A_center, B_center, alpha, conjg(beta), power_A, power_B &
+                                    , overlap_x, overlap_y, overlap_p2_2, overlap, dim1        )
+
+          power_A(3) = power_A(3) - 2
+          
+          deriv_tmp_1 = ( -2.d0 * alpha * (2.d0 * power_A(3) + 1.d0) * overlap_z0_1 &
+                        + power_A(3) * (power_A(3) - 1.d0) * overlap_m2_1           &
+                        + 4.d0 * alpha * alpha * overlap_p2_1 ) * overlap_x0_1 * overlap_y0_1
+
+          deriv_tmp_2 = ( -2.d0 * alpha * (2.d0 * power_A(3) + 1.d0) * overlap_z0_2 &
+                        + power_A(3) * (power_A(3) - 1.d0) * overlap_m2_2           &
+                        + 4.d0 * alpha * alpha * overlap_p2_2 ) * overlap_x0_2 * overlap_y0_2
+
+          deriv_tmp = 2.d0 * real(deriv_tmp_1 + deriv_tmp_2)
+
+          ao_deriv2_cosgtos_z(i,j) += c * deriv_tmp
+
+          ! ---
+
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [double precision, ao_kinetic_integrals_cosgtos, (ao_num, ao_num)]
+
+  BEGIN_DOC
+  ! 
+  ! Kinetic energy integrals in the cosgtos |AO| basis.
+  !
+  ! $\langle \chi_i |\hat{T}| \chi_j \rangle$
+  !
+  END_DOC
+
+  implicit none
+  integer :: i, j
+
+ !$OMP PARALLEL DO DEFAULT(NONE) &
+ !$OMP  PRIVATE(i, j)            &
+ !$OMP  SHARED(ao_num, ao_kinetic_integrals_cosgtos, ao_deriv2_cosgtos_x, ao_deriv2_cosgtos_y, ao_deriv2_cosgtos_z)
+  do j = 1, ao_num
+    do i = 1, ao_num
+      ao_kinetic_integrals_cosgtos(i,j) = -0.5d0 * ( ao_deriv2_cosgtos_x(i,j) &
+                                                   + ao_deriv2_cosgtos_y(i,j) &
+                                                   + ao_deriv2_cosgtos_z(i,j) )
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+! ---
diff --git a/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f b/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f
new file mode 100644
index 00000000..527a98d5
--- /dev/null
+++ b/src/cosgtos_ao_int/two_e_Coul_integrals.irp.f
@@ -0,0 +1,1584 @@
+
+! ---
+
+double precision function ao_two_e_integral_cosgtos(i, j, k, l)
+
+  BEGIN_DOC
+  !  integral of the AO basis <ik|jl> or (ij|kl)
+  !     i(r1) j(r1) 1/r12 k(r2) l(r2)
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer, intent(in) :: i, j, k, l
+
+  integer             :: p, q, r, s
+  integer             :: num_i, num_j, num_k, num_l, dim1, I_power(3), J_power(3), K_power(3), L_power(3)
+  integer             :: iorder_p1(3), iorder_p2(3), iorder_p3(3), iorder_p4(3), iorder_q1(3), iorder_q2(3)
+  double precision    :: coef1, coef2, coef3, coef4
+  complex*16          :: I_center(3), J_center(3), K_center(3), L_center(3)
+  complex*16          :: expo1, expo2, expo3, expo4
+  complex*16          :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  complex*16          :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv
+  complex*16          :: P3_new(0:max_dim,3), P3_center(3), fact_p3, pp3, p3_inv
+  complex*16          :: P4_new(0:max_dim,3), P4_center(3), fact_p4, pp4, p4_inv
+  complex*16          :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  complex*16          :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv
+  complex*16          :: integral1, integral2, integral3, integral4
+  complex*16          :: integral5, integral6, integral7, integral8
+  complex*16          :: integral_tot
+
+  double precision    :: ao_two_e_integral_cosgtos_schwartz_accel
+  complex*16          :: ERI_cosgtos
+  complex*16          :: general_primitive_integral_cosgtos
+
+  if(ao_prim_num(i) * ao_prim_num(j) * ao_prim_num(k) * ao_prim_num(l) > 1024) then
+
+    !print *, ' with shwartz acc '
+    ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l)
+
+  else
+    !print *, ' without shwartz acc '
+
+    dim1 = n_pt_max_integrals
+
+    num_i = ao_nucl(i)
+    num_j = ao_nucl(j)
+    num_k = ao_nucl(k)
+    num_l = ao_nucl(l)
+
+    ao_two_e_integral_cosgtos = 0.d0
+
+    if(num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k) then
+      !print *, ' not the same center'
+
+      do p = 1, 3
+        I_power(p)  = ao_power(i,p)
+        J_power(p)  = ao_power(j,p)
+        K_power(p)  = ao_power(k,p)
+        L_power(p)  = ao_power(l,p)
+        I_center(p) = nucl_coord(num_i,p) * (1.d0, 0.d0) 
+        J_center(p) = nucl_coord(num_j,p) * (1.d0, 0.d0)
+        K_center(p) = nucl_coord(num_k,p) * (1.d0, 0.d0)
+        L_center(p) = nucl_coord(num_l,p) * (1.d0, 0.d0)
+      enddo
+
+      do p = 1, ao_prim_num(i)
+        coef1 = ao_coef_norm_ord_transp_cosgtos(p,i)
+        expo1 = ao_expo_ord_transp_cosgtos(p,i) 
+
+        do q = 1, ao_prim_num(j)
+          coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j)
+          expo2 = ao_expo_ord_transp_cosgtos(q,j) 
+
+          call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1               &
+                                                , expo1, expo2, I_power, J_power, I_center, J_center, dim1 )
+          p1_inv = (1.d0,0.d0) / pp1
+
+          call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2                      &
+                                                , conjg(expo1), expo2, I_power, J_power, I_center, J_center, dim1 )
+          p2_inv = (1.d0,0.d0) / pp2
+
+          call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3                      &
+                                                , expo1, conjg(expo2), I_power, J_power, I_center, J_center, dim1 )
+          p3_inv = (1.d0,0.d0) / pp3
+
+          call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4                             &
+                                                , conjg(expo1), conjg(expo2), I_power, J_power, I_center, J_center, dim1 )
+          p4_inv = (1.d0,0.d0) / pp4
+
+          !integer :: ii
+          !do ii = 1, 3
+          !  print *, 'fact_p1', fact_p1
+          !  print *, 'fact_p2', fact_p2
+          !  print *, 'fact_p3', fact_p3
+          !  print *, 'fact_p4', fact_p4
+          !  !print *, pp1, p1_inv
+          !  !print *, pp2, p2_inv
+          !  !print *, pp3, p3_inv
+          !  !print *, pp4, p4_inv
+          !enddo 
+          !  if( abs(aimag(P1_center(ii))) .gt. 0.d0 ) then
+          !    print *, ' P_1 is complex !!'
+          !    print *, P1_center
+          !    print *, expo1, expo2
+          !    print *, conjg(expo1), conjg(expo2)
+          !    stop
+          !  endif
+          !  if( abs(aimag(P2_center(ii))) .gt. 0.d0 ) then
+          !    print *, ' P_2 is complex !!'
+          !    print *, P2_center
+          !    print *, ' old expos:'
+          !    print *, expo1, expo2
+          !    print *, conjg(expo1), conjg(expo2)
+          !    print *, ' new expo:'
+          !    print *, pp2, p2_inv
+          !    print *, ' factor:'
+          !    print *, fact_p2
+          !    print *, ' old centers:'
+          !    print *, I_center, J_center
+          !    print *, ' powers:'
+          !    print *, I_power, J_power
+          !    stop
+          !  endif
+          !  if( abs(aimag(P3_center(ii))) .gt. 0.d0 ) then
+          !    print *, ' P_3 is complex !!'
+          !    print *, P3_center
+          !    print *, expo1, expo2
+          !    print *, conjg(expo1), conjg(expo2)
+          !    stop
+          !  endif
+          !  if( abs(aimag(P4_center(ii))) .gt. 0.d0 ) then
+          !    print *, ' P_4 is complex !!'
+          !    print *, P4_center
+          !    print *, expo1, expo2
+          !    print *, conjg(expo1), conjg(expo2)
+          !    stop
+          !  endif
+          !enddo
+
+          do r = 1, ao_prim_num(k)
+            coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k)
+            expo3 = ao_expo_ord_transp_cosgtos(r,k) 
+
+            do s = 1, ao_prim_num(l)
+              coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l)
+              expo4 = ao_expo_ord_transp_cosgtos(s,l) 
+
+              call give_explicit_cpoly_and_cgaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q1               &
+                                                    , expo3, expo4, K_power, L_power, K_center, L_center, dim1 )
+              q1_inv = (1.d0,0.d0) / qq1
+
+              call give_explicit_cpoly_and_cgaussian( Q2_new, Q2_center, qq2, fact_q2, iorder_q2               &
+                                                    , conjg(expo3), expo4, K_power, L_power, K_center, L_center, dim1 )
+              q2_inv = (1.d0,0.d0) / qq2
+
+              !do ii = 1, 3
+              !  !print *, qq1, q1_inv
+              !  !print *, qq2, q2_inv
+              !  print *, 'fact_q1', fact_q1
+              !  print *, 'fact_q2', fact_q2
+              !enddo 
+              !  if( abs(aimag(Q1_center(ii))) .gt. 0.d0 ) then
+              !    print *, ' Q_1 is complex !!'
+              !    print *, Q1_center
+              !    print *, expo3, expo4
+              !    print *, conjg(expo3), conjg(expo4)
+              !    stop
+              !  endif
+              !  if( abs(aimag(Q2_center(ii))) .gt. 0.d0 ) then
+              !    print *, ' Q_2 is complex !!'
+              !    print *, Q2_center
+              !    print *, expo3, expo4
+              !    print *, conjg(expo3), conjg(expo4)
+              !    stop
+              !  endif
+              !enddo
+
+
+              integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                                  , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+              integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                                  , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+              integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                                  , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+              integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                                  , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+              integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                                  , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+              integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                                  , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+              integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                                  , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+              integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                                  , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+              integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+              !integral_tot = integral1
+              !print*, integral_tot
+
+              ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos + coef4 * 2.d0 * real(integral_tot)
+            enddo ! s
+          enddo  ! r
+        enddo   ! q
+      enddo    ! p
+
+    else
+      !print *, ' the same center'
+
+      do p = 1, 3
+        I_power(p) = ao_power(i,p)
+        J_power(p) = ao_power(j,p)
+        K_power(p) = ao_power(k,p)
+        L_power(p) = ao_power(l,p)
+      enddo
+
+      do p = 1, ao_prim_num(i)
+        coef1 = ao_coef_norm_ord_transp_cosgtos(p,i)
+        expo1 = ao_expo_ord_transp_cosgtos(p,i) 
+
+        do q = 1, ao_prim_num(j)
+          coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j)
+          expo2 = ao_expo_ord_transp_cosgtos(q,j) 
+
+          do r = 1, ao_prim_num(k)
+            coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k)
+            expo3 = ao_expo_ord_transp_cosgtos(r,k) 
+
+            do s = 1, ao_prim_num(l)
+              coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l)
+              expo4 = ao_expo_ord_transp_cosgtos(s,l) 
+
+              integral1 = ERI_cosgtos( expo1, expo2, expo3, expo4                     &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral2 = ERI_cosgtos( expo1, expo2, conjg(expo3), expo4              &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral3 = ERI_cosgtos( conjg(expo1), expo2, expo3, expo4              &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo3), expo4       &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral5 = ERI_cosgtos( expo1, conjg(expo2), expo3, expo4              &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo3), expo4       &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo3, expo4       &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+              integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo3), expo4 &
+                                     , I_power(1), J_power(1), K_power(1), L_power(1)  &
+                                     , I_power(2), J_power(2), K_power(2), L_power(2)  &
+                                     , I_power(3), J_power(3), K_power(3), L_power(3)  )
+
+              integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+
+              ao_two_e_integral_cosgtos = ao_two_e_integral_cosgtos + coef4 * 2.d0 * real(integral_tot)
+            enddo ! s
+          enddo  ! r
+        enddo   ! q
+      enddo    ! p
+
+    endif
+  endif
+
+end function ao_two_e_integral_cosgtos
+
+! ---
+
+double precision function ao_two_e_integral_cosgtos_schwartz_accel(i, j, k, l)
+
+  BEGIN_DOC
+  !  integral of the AO basis <ik|jl> or (ij|kl)
+  !     i(r1) j(r1) 1/r12 k(r2) l(r2)
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer, intent(in)           :: i, j, k, l
+
+  integer                       :: p, q, r, s
+  integer                       :: num_i, num_j, num_k, num_l, dim1, I_power(3), J_power(3), K_power(3), L_power(3)
+  integer                       :: iorder_p1(3), iorder_p2(3), iorder_p3(3), iorder_p4(3), iorder_q1(3), iorder_q2(3)
+  double precision              :: coef1, coef2, coef3, coef4
+  complex*16                    :: I_center(3), J_center(3), K_center(3), L_center(3)
+  complex*16                    :: expo1, expo2, expo3, expo4
+  complex*16                    :: P1_new(0:max_dim,3), P1_center(3), fact_p1, pp1, p1_inv
+  complex*16                    :: P2_new(0:max_dim,3), P2_center(3), fact_p2, pp2, p2_inv
+  complex*16                    :: P3_new(0:max_dim,3), P3_center(3), fact_p3, pp3, p3_inv
+  complex*16                    :: P4_new(0:max_dim,3), P4_center(3), fact_p4, pp4, p4_inv
+  complex*16                    :: Q1_new(0:max_dim,3), Q1_center(3), fact_q1, qq1, q1_inv
+  complex*16                    :: Q2_new(0:max_dim,3), Q2_center(3), fact_q2, qq2, q2_inv
+  complex*16                    :: integral1, integral2, integral3, integral4
+  complex*16                    :: integral5, integral6, integral7, integral8
+  complex*16                    :: integral_tot
+
+  double precision, allocatable :: schwartz_kl(:,:)
+  double precision              :: thr
+  double precision              :: schwartz_ij
+
+  complex*16                    :: ERI_cosgtos
+  complex*16                    :: general_primitive_integral_cosgtos
+
+  ao_two_e_integral_cosgtos_schwartz_accel = 0.d0
+
+  dim1 = n_pt_max_integrals
+
+  num_i = ao_nucl(i)
+  num_j = ao_nucl(j)
+  num_k = ao_nucl(k)
+  num_l = ao_nucl(l)
+
+
+  thr = ao_integrals_threshold*ao_integrals_threshold
+
+  allocate( schwartz_kl(0:ao_prim_num(l),0:ao_prim_num(k)) )
+
+  if(num_i /= num_j .or. num_k /= num_l .or. num_j /= num_k) then
+
+    do p = 1, 3
+      I_power(p)  = ao_power(i,p)
+      J_power(p)  = ao_power(j,p)
+      K_power(p)  = ao_power(k,p)
+      L_power(p)  = ao_power(l,p)
+      I_center(p) = nucl_coord(num_i,p) * (1.d0, 0.d0)
+      J_center(p) = nucl_coord(num_j,p) * (1.d0, 0.d0)
+      K_center(p) = nucl_coord(num_k,p) * (1.d0, 0.d0)
+      L_center(p) = nucl_coord(num_l,p) * (1.d0, 0.d0)
+    enddo
+
+    schwartz_kl(0,0) = 0.d0
+    do r = 1, ao_prim_num(k)
+      coef1 = ao_coef_norm_ord_transp_cosgtos(r,k) * ao_coef_norm_ord_transp_cosgtos(r,k)
+      expo1 = ao_expo_ord_transp_cosgtos(r,k) 
+
+      schwartz_kl(0,r) = 0.d0
+      do s = 1, ao_prim_num(l)
+        coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(s,l) * ao_coef_norm_ord_transp_cosgtos(s,l)
+        expo2 = ao_expo_ord_transp_cosgtos(s,l) 
+
+        call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1               &
+                                              , expo1, expo2, K_power, L_power, K_center, L_center, dim1 )
+        p1_inv = (1.d0,0.d0) / pp1
+
+        call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2                      &
+                                              , conjg(expo1), expo2, K_power, L_power, K_center, L_center, dim1 )
+        p2_inv = (1.d0,0.d0) / pp2
+
+        call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3                      &
+                                              , expo1, conjg(expo2), K_power, L_power, K_center, L_center, dim1 )
+        p3_inv = (1.d0,0.d0) / pp3
+
+        call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4                             &
+                                              , conjg(expo1), conjg(expo2), K_power, L_power, K_center, L_center, dim1 )
+        p4_inv = (1.d0,0.d0) / pp4
+
+        integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+
+        schwartz_kl(s,r) = coef2 * 2.d0 * real(integral_tot)
+
+        schwartz_kl(0,r) = max(schwartz_kl(0,r), schwartz_kl(s,r))
+      enddo
+
+      schwartz_kl(0,0) = max(schwartz_kl(0,r), schwartz_kl(0,0))
+    enddo
+
+
+    do p = 1, ao_prim_num(i)
+      coef1 = ao_coef_norm_ord_transp_cosgtos(p,i)
+      expo1 = ao_expo_ord_transp_cosgtos(p,i) 
+
+      do q = 1, ao_prim_num(j)
+        coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j)
+        expo2 = ao_expo_ord_transp_cosgtos(q,j) 
+
+        call give_explicit_cpoly_and_cgaussian( P1_new, P1_center, pp1, fact_p1, iorder_p1               &
+                                              , expo1, expo2, I_power, J_power, I_center, J_center, dim1 )
+        p1_inv = (1.d0,0.d0) / pp1
+
+        call give_explicit_cpoly_and_cgaussian( P2_new, P2_center, pp2, fact_p2, iorder_p2                      &
+                                              , conjg(expo1), expo2, I_power, J_power, I_center, J_center, dim1 )
+        p2_inv = (1.d0,0.d0) / pp2
+
+        call give_explicit_cpoly_and_cgaussian( P3_new, P3_center, pp3, fact_p3, iorder_p3                      &
+                                              , expo1, conjg(expo2), I_power, J_power, I_center, J_center, dim1 )
+        p3_inv = (1.d0,0.d0) / pp3
+
+        call give_explicit_cpoly_and_cgaussian( P4_new, P4_center, pp4, fact_p4, iorder_p4                             &
+                                              , conjg(expo1), conjg(expo2), I_power, J_power, I_center, J_center, dim1 )
+        p4_inv = (1.d0,0.d0) / pp4
+
+        integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                            , P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 )
+
+        integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                            , P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 )
+
+        integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+        schwartz_ij = coef2 * coef2 * 2.d0 * real(integral_tot)
+
+        if(schwartz_kl(0,0)*schwartz_ij < thr) cycle
+
+        do r = 1, ao_prim_num(k)
+          if(schwartz_kl(0,r)*schwartz_ij < thr) cycle
+
+          coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k)
+          expo3 = ao_expo_ord_transp_cosgtos(r,k)
+
+          do s = 1, ao_prim_num(l)
+            if(schwartz_kl(s,r)*schwartz_ij < thr) cycle
+
+            coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l)
+            expo4 = ao_expo_ord_transp_cosgtos(s,l)
+
+            call give_explicit_cpoly_and_cgaussian( Q1_new, Q1_center, qq1, fact_q1, iorder_q1               &
+                                                  , expo3, expo4, K_power, L_power, K_center, L_center, dim1 )
+            q1_inv = (1.d0,0.d0) / qq1
+
+            call give_explicit_cpoly_and_cgaussian( Q2_new, Q2_center, qq2, fact_q2, iorder_q2               &
+                                                  , conjg(expo3), expo4, K_power, L_power, K_center, L_center, dim1 )
+            q2_inv = (1.d0,0.d0) / qq2
+
+            integral1 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                                , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+            integral2 = general_primitive_integral_cosgtos( dim1, P1_new, P1_center, fact_p1, pp1, p1_inv, iorder_p1 &
+                                                                , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+            integral3 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                                , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+            integral4 = general_primitive_integral_cosgtos( dim1, P2_new, P2_center, fact_p2, pp2, p2_inv, iorder_p2 &
+                                                                , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+
+            integral5 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                                , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+            integral6 = general_primitive_integral_cosgtos( dim1, P3_new, P3_center, fact_p3, pp3, p3_inv, iorder_p3 &
+                                                                , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+            integral7 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                                , Q1_new, Q1_center, fact_q1, qq1, q1_inv, iorder_q1 )
+
+            integral8 = general_primitive_integral_cosgtos( dim1, P4_new, P4_center, fact_p4, pp4, p4_inv, iorder_p4 &
+                                                                , Q2_new, Q2_center, fact_q2, qq2, q2_inv, iorder_q2 )
+
+            integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+            ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel &
+                                                     + coef4 * 2.d0 * real(integral_tot)
+          enddo ! s
+        enddo  ! r
+      enddo   ! q
+    enddo    ! p
+
+  else
+
+    do p = 1, 3
+      I_power(p) = ao_power(i,p)
+      J_power(p) = ao_power(j,p)
+      K_power(p) = ao_power(k,p)
+      L_power(p) = ao_power(l,p)
+    enddo
+
+    schwartz_kl(0,0) = 0.d0
+    do r = 1, ao_prim_num(k)
+      coef1 = ao_coef_norm_ord_transp_cosgtos(r,k) * ao_coef_norm_ord_transp_cosgtos(r,k)
+      expo1 = ao_expo_ord_transp_cosgtos(r,k)
+
+      schwartz_kl(0,r) = 0.d0
+      do s = 1, ao_prim_num(l)
+        coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(s,l) * ao_coef_norm_ord_transp_cosgtos(s,l)
+        expo2 = ao_expo_ord_transp_cosgtos(s,l)
+
+        integral1 = ERI_cosgtos( expo1, expo2, expo1, expo2                     &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+        integral2 = ERI_cosgtos( expo1, expo2, conjg(expo1), expo2              &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+
+        integral3 = ERI_cosgtos( conjg(expo1), expo2, expo1, expo2              &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+        integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo1), expo2       &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+
+        integral5 = ERI_cosgtos( expo1, conjg(expo2), expo1, expo2              &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+        integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo1), expo2       &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+
+        integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo1, expo2       &
+                               , K_power(1), L_power(1), K_power(1), L_power(1) &
+                               , K_power(2), L_power(2), K_power(2), L_power(2) &
+                               , K_power(3), L_power(3), K_power(3), L_power(3) )
+        integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo1), expo2 &
+                               , K_power(1), L_power(1), K_power(1), L_power(1)  &
+                               , K_power(2), L_power(2), K_power(2), L_power(2)  &
+                               , K_power(3), L_power(3), K_power(3), L_power(3)  )
+
+        integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+
+        schwartz_kl(s,r) = coef2 * 2.d0 * real(integral_tot)
+
+        schwartz_kl(0,r) = max(schwartz_kl(0,r), schwartz_kl(s,r))
+      enddo
+      schwartz_kl(0,0) = max(schwartz_kl(0,r), schwartz_kl(0,0))
+    enddo
+
+    do p = 1, ao_prim_num(i)
+      coef1 = ao_coef_norm_ord_transp_cosgtos(p,i)
+      expo1 = ao_expo_ord_transp_cosgtos(p,i)
+
+      do q = 1, ao_prim_num(j)
+        coef2 = coef1 * ao_coef_norm_ord_transp_cosgtos(q,j)
+        expo2 = ao_expo_ord_transp_cosgtos(q,j)
+
+        integral1 = ERI_cosgtos( expo1, expo2, expo1, expo2                     &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral2 = ERI_cosgtos( expo1, expo2, conjg(expo1), expo2              &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral3 = ERI_cosgtos( conjg(expo1), expo2, expo1, expo2              &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo1), expo2       &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral5 = ERI_cosgtos( expo1, conjg(expo2), expo1, expo2              &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo1), expo2       &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo1, expo2       &
+                               , I_power(1), J_power(1), I_power(1), J_power(1) &
+                               , I_power(2), J_power(2), I_power(2), J_power(2) &
+                               , I_power(3), J_power(3), I_power(3), J_power(3) )
+
+        integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo1), expo2 &
+                               , I_power(1), J_power(1), I_power(1), J_power(1)  &
+                               , I_power(2), J_power(2), I_power(2), J_power(2)  &
+                               , I_power(3), J_power(3), I_power(3), J_power(3)  )
+
+        integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+        schwartz_ij = coef2 * coef2 * 2.d0 * real(integral_tot)
+
+        if(schwartz_kl(0,0)*schwartz_ij < thr) cycle
+        do r = 1, ao_prim_num(k)
+          if(schwartz_kl(0,r)*schwartz_ij < thr) cycle
+
+          coef3 = coef2 * ao_coef_norm_ord_transp_cosgtos(r,k)
+          expo3 = ao_expo_ord_transp_cosgtos(r,k)
+
+          do s = 1, ao_prim_num(l)
+            if(schwartz_kl(s,r)*schwartz_ij < thr) cycle
+
+            coef4 = coef3 * ao_coef_norm_ord_transp_cosgtos(s,l)
+            expo4 = ao_expo_ord_transp_cosgtos(s,l)
+
+            integral1 = ERI_cosgtos( expo1, expo2, expo3, expo4                     &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral2 = ERI_cosgtos( expo1, expo2, conjg(expo3), expo4              &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral3 = ERI_cosgtos( conjg(expo1), expo2, expo3, expo4              &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral4 = ERI_cosgtos( conjg(expo1), expo2, conjg(expo3), expo4       &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral5 = ERI_cosgtos( expo1, conjg(expo2), expo3, expo4              &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral6 = ERI_cosgtos( expo1, conjg(expo2), conjg(expo3), expo4       &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral7 = ERI_cosgtos( conjg(expo1), conjg(expo2), expo3, expo4       &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1) &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2) &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3) )
+
+            integral8 = ERI_cosgtos( conjg(expo1), conjg(expo2), conjg(expo3), expo4 &
+                                   , I_power(1), J_power(1), K_power(1), L_power(1)  &
+                                   , I_power(2), J_power(2), K_power(2), L_power(2)  &
+                                   , I_power(3), J_power(3), K_power(3), L_power(3)  )
+
+            integral_tot = integral1 + integral2 + integral3 + integral4 + integral5 + integral6 + integral7 + integral8
+
+            ao_two_e_integral_cosgtos_schwartz_accel = ao_two_e_integral_cosgtos_schwartz_accel &
+                                      + coef4 * 2.d0 * real(integral_tot)
+          enddo ! s
+        enddo  ! r
+      enddo   ! q
+    enddo    ! p
+
+  endif
+
+  deallocate(schwartz_kl)
+
+end function ao_two_e_integral_cosgtos_schwartz_accel
+
+! ---
+
+BEGIN_PROVIDER [ double precision, ao_two_e_integral_cosgtos_schwartz, (ao_num,ao_num)  ]
+
+  BEGIN_DOC
+  !  Needed to compute Schwartz inequalities
+  END_DOC
+
+  implicit none
+  integer          :: i, k
+  double precision :: ao_two_e_integral_cosgtos
+
+  ao_two_e_integral_cosgtos_schwartz(1,1) = ao_two_e_integral_cosgtos(1, 1, 1, 1)
+
+ !$OMP PARALLEL DO PRIVATE(i,k)                                       &
+ !$OMP             DEFAULT(NONE)                                      &
+ !$OMP             SHARED(ao_num, ao_two_e_integral_cosgtos_schwartz) &
+ !$OMP             SCHEDULE(dynamic)
+  do i = 1, ao_num
+    do k = 1, i
+      ao_two_e_integral_cosgtos_schwartz(i,k) = dsqrt(ao_two_e_integral_cosgtos(i, i, k, k))
+      ao_two_e_integral_cosgtos_schwartz(k,i) = ao_two_e_integral_cosgtos_schwartz(i,k)
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+END_PROVIDER
+
+! ---
+
+complex*16 function general_primitive_integral_cosgtos( dim, P_new, P_center, fact_p, p, p_inv, iorder_p &
+                                                           , Q_new, Q_center, fact_q, q, q_inv, iorder_q )
+
+  BEGIN_DOC
+  !
+  ! Computes the integral <pq|rs> where p,q,r,s are cos-cGTOS primitives
+  !
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in) :: dim
+  integer,    intent(in) :: iorder_p(3), iorder_q(3)
+  complex*16, intent(in) :: P_new(0:max_dim,3), P_center(3), fact_p, p, p_inv
+  complex*16, intent(in) :: Q_new(0:max_dim,3), Q_center(3), fact_q, q, q_inv
+
+  integer                :: i, j, nx, ny, nz, n_Ix, n_Iy, n_Iz, iorder, n_pt_tmp, n_pt_out
+  double precision       :: tmp_mod
+  double precision       :: ppq_re, ppq_im, ppq_mod, sq_ppq_re, sq_ppq_im
+  complex*16             :: pq, pq_inv, pq_inv_2, p01_1, p01_2, p10_1, p10_2, ppq, sq_ppq
+  complex*16             :: rho, dist, const
+  complex*16             :: accu, tmp_p, tmp_q
+  complex*16             :: dx(0:max_dim), Ix_pol(0:max_dim), dy(0:max_dim), Iy_pol(0:max_dim), dz(0:max_dim), Iz_pol(0:max_dim)
+  complex*16             :: d1(0:max_dim), d_poly(0:max_dim)
+
+  complex*16             :: crint_sum
+
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: dx, Ix_pol, dy, Iy_pol, dz, Iz_pol
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: d1, d_poly
+
+  general_primitive_integral_cosgtos = (0.d0, 0.d0)
+
+  pq       = (0.5d0, 0.d0) * p_inv * q_inv
+  pq_inv   = (0.5d0, 0.d0) / (p + q)
+  pq_inv_2 = pq_inv + pq_inv
+  p10_1    = q * pq               ! 1/(2p)
+  p01_1    = p * pq               ! 1/(2q)
+  p10_2    = pq_inv_2 * p10_1 * q ! 0.5d0*q/(pq + p*p)
+  p01_2    = pq_inv_2 * p01_1 * p ! 0.5d0*p/(q*q + pq)
+
+  ! get \sqrt(p + q)
+  !ppq     = p + q
+  !ppq_re  = REAL (ppq)
+  !ppq_im  = AIMAG(ppq)
+  !ppq_mod = dsqrt(ppq_re*ppq_re + ppq_im*ppq_im)
+  !sq_ppq_re = sq_op5 * dsqrt(ppq_re + ppq_mod)
+  !sq_ppq_im = 0.5d0 * ppq_im / sq_ppq_re 
+  !sq_ppq    = sq_ppq_re + (0.d0, 1.d0) * sq_ppq_im
+  sq_ppq = zsqrt(p + q)
+
+  ! ---
+
+  iorder = iorder_p(1) + iorder_q(1) + iorder_p(1) + iorder_q(1)
+
+  do i = 0, iorder
+    Ix_pol(i) = (0.d0, 0.d0)
+  enddo
+
+  n_Ix = 0
+  do i = 0, iorder_p(1)
+
+    tmp_p   = P_new(i,1)
+    tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p))
+    if(tmp_mod < thresh) cycle
+
+    do j = 0, iorder_q(1)
+
+      tmp_q   = tmp_p * Q_new(j,1)
+      tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q))
+      if(tmp_mod < thresh) cycle
+
+      !DIR$ FORCEINLINE
+      call give_cpolynom_mult_center_x(P_center(1), Q_center(1), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dx, nx)
+      !DIR$ FORCEINLINE
+      call add_cpoly_multiply(dx, nx, tmp_q, Ix_pol, n_Ix)
+    enddo
+  enddo
+  if(n_Ix == -1) then
+    return
+  endif
+
+  ! ---
+
+  iorder = iorder_p(2) + iorder_q(2) + iorder_p(2) + iorder_q(2)
+
+  do i = 0, iorder
+    Iy_pol(i) = (0.d0, 0.d0)
+  enddo
+
+  n_Iy = 0
+  do i = 0, iorder_p(2)
+
+    tmp_p   = P_new(i,2)
+    tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p))
+    if(tmp_mod < thresh) cycle
+
+    do j = 0, iorder_q(2)
+
+      tmp_q   = tmp_p * Q_new(j,2)
+      tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q))
+      if(tmp_mod < thresh) cycle
+
+      !DIR$ FORCEINLINE
+      call give_cpolynom_mult_center_x(P_center(2), Q_center(2), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dy, ny)
+      !DIR$ FORCEINLINE
+      call add_cpoly_multiply(dy, ny, tmp_q, Iy_pol, n_Iy)
+    enddo
+  enddo
+
+  if(n_Iy == -1) then
+    return
+  endif
+
+  ! ---
+
+  iorder = iorder_p(3) + iorder_q(3) + iorder_p(3) + iorder_q(3)
+
+  do i = 0, iorder
+    Iz_pol(i) = (0.d0, 0.d0)
+  enddo
+
+  n_Iz = 0
+  do i = 0, iorder_p(3)
+
+    tmp_p   = P_new(i,3)
+    tmp_mod = dsqrt(REAL(tmp_p)*REAL(tmp_p) + AIMAG(tmp_p)*AIMAG(tmp_p))
+    if(tmp_mod < thresh) cycle
+
+    do j = 0, iorder_q(3)
+
+      tmp_q   = tmp_p * Q_new(j,3)
+      tmp_mod = dsqrt(REAL(tmp_q)*REAL(tmp_q) + AIMAG(tmp_q)*AIMAG(tmp_q))
+      if(tmp_mod < thresh) cycle
+
+      !DIR$ FORCEINLINE
+      call give_cpolynom_mult_center_x(P_center(3), Q_center(3), i, j, p, q, iorder, pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, dz, nz)
+      !DIR$ FORCEINLINE
+      call add_cpoly_multiply(dz, nz, tmp_q, Iz_pol, n_Iz)
+    enddo
+  enddo
+
+  if(n_Iz == -1) then
+    return
+  endif
+
+  ! ---
+
+  rho = p * q * pq_inv_2
+  dist = (P_center(1) - Q_center(1)) * (P_center(1) - Q_center(1)) &
+       + (P_center(2) - Q_center(2)) * (P_center(2) - Q_center(2)) &
+       + (P_center(3) - Q_center(3)) * (P_center(3) - Q_center(3))
+  const = dist * rho
+
+  n_pt_tmp = n_Ix + n_Iy
+  do i = 0, n_pt_tmp
+    d_poly(i) = (0.d0, 0.d0)
+  enddo
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(Ix_pol, n_Ix, Iy_pol, n_Iy, d_poly, n_pt_tmp)
+  if(n_pt_tmp == -1) then
+    return
+  endif
+  n_pt_out = n_pt_tmp + n_Iz
+  do i = 0, n_pt_out
+    d1(i) = (0.d0, 0.d0)
+  enddo
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(d_poly, n_pt_tmp, Iz_pol, n_Iz, d1, n_pt_out)
+
+  accu = crint_sum(n_pt_out, const, d1)
+!  print *, n_pt_out, real(d1(0:n_pt_out))
+!  print *, real(accu)
+
+  general_primitive_integral_cosgtos = fact_p * fact_q * accu * pi_5_2 * p_inv * q_inv / sq_ppq
+
+end function general_primitive_integral_cosgtos
+
+! ---
+
+complex*16 function ERI_cosgtos(alpha, beta, delta, gama, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z)
+
+  BEGIN_DOC
+  !  ATOMIC PRIMTIVE two-electron integral between the 4 primitives ::
+  !         primitive_1 = x1**(a_x) y1**(a_y) z1**(a_z) exp(-alpha * r1**2)
+  !         primitive_2 = x1**(b_x) y1**(b_y) z1**(b_z) exp(- beta * r1**2)
+  !         primitive_3 = x2**(c_x) y2**(c_y) z2**(c_z) exp(-delta * r2**2)
+  !         primitive_4 = x2**(d_x) y2**(d_y) z2**(d_z) exp(- gama * r2**2)
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in) :: a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z
+  complex*16, intent(in) :: delta, gama, alpha, beta
+
+  integer                :: a_x_2, b_x_2, c_x_2, d_x_2, a_y_2, b_y_2, c_y_2, d_y_2, a_z_2, b_z_2, c_z_2, d_z_2
+  integer                :: i, j, k, l, n_pt
+  integer                :: nx, ny, nz
+  double precision       :: ppq_re, ppq_im, ppq_mod, sq_ppq_re, sq_ppq_im
+  complex*16             :: p, q, ppq, sq_ppq, coeff, I_f
+
+  ERI_cosgtos = (0.d0, 0.d0)
+
+  ASSERT (REAL(alpha) >= 0.d0)
+  ASSERT (REAL(beta ) >= 0.d0)
+  ASSERT (REAL(delta) >= 0.d0)
+  ASSERT (REAL(gama ) >= 0.d0)
+
+  nx = a_x + b_x + c_x + d_x
+  if(iand(nx,1) == 1) then
+    ERI_cosgtos = (0.d0, 0.d0)
+    return
+  endif
+
+  ny = a_y + b_y + c_y + d_y
+  if(iand(ny,1) == 1) then
+    ERI_cosgtos = (0.d0, 0.d0)
+    return
+  endif
+
+  nz = a_z + b_z + c_z + d_z
+  if(iand(nz,1) == 1) then
+    ERI_cosgtos = (0.d0, 0.d0)
+    return
+  endif
+
+  n_pt = shiftl(nx+ny+nz, 1)
+
+  p = alpha + beta
+  q = delta + gama
+
+  ! get \sqrt(p + q)
+  !ppq     = p + q
+  !ppq_re  = REAL (ppq)
+  !ppq_im  = AIMAG(ppq)
+  !ppq_mod = dsqrt(ppq_re*ppq_re + ppq_im*ppq_im)
+  !sq_ppq_re = sq_op5 * dsqrt(ppq_re + ppq_mod)
+  !sq_ppq_im = 0.5d0 * ppq_im / sq_ppq_re 
+  !sq_ppq    = sq_ppq_re + (0.d0, 1.d0) * sq_ppq_im
+  sq_ppq = zsqrt(p + q)
+
+  coeff = pi_5_2 / (p * q * sq_ppq)
+  if(n_pt == 0) then
+    ERI_cosgtos = coeff
+    return
+  endif
+
+  call integrale_new_cosgtos(I_f, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z, p, q, n_pt)
+
+  ERI_cosgtos = I_f * coeff
+
+end function ERI_cosgtos
+
+! ---
+
+subroutine integrale_new_cosgtos(I_f, a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z, p, q, n_pt)
+
+  BEGIN_DOC
+  ! Calculates the integral of the polynomial :
+  !
+  ! $I_{x_1}(a_x+b_x, c_x+d_x, p, q) \, I_{x_1}(a_y+b_y, c_y+d_y, p, q) \, I_{x_1}(a_z+b_z, c_z+d_z, p, q)$
+  ! in $( 0 ; 1)$
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)  :: n_pt
+  integer,    intent(in)  :: a_x, b_x, c_x, d_x, a_y, b_y, c_y, d_y, a_z, b_z, c_z, d_z
+  complex*16, intent(out) :: I_f
+
+  integer                 :: i, j, ix, iy, iz, jx, jy, jz, sx, sy, sz
+  complex*16              :: p, q
+  complex*16              :: pq_inv, p10_1, p10_2, p01_1, p01_2, pq_inv_2
+  complex*16              :: B00(n_pt_max_integrals), B10(n_pt_max_integrals), B01(n_pt_max_integrals)
+  complex*16              :: t1(n_pt_max_integrals), t2(n_pt_max_integrals)
+
+
+  ASSERT (n_pt > 1)
+
+  j = shiftr(n_pt, 1)
+
+  pq_inv   = (0.5d0, 0.d0) / (p + q)
+  p10_1    = (0.5d0, 0.d0) / p
+  p01_1    = (0.5d0, 0.d0) / q
+  p10_2    = (0.5d0, 0.d0) * q /(p * q + p * p)
+  p01_2    = (0.5d0, 0.d0) * p /(q * q + q * p)
+  pq_inv_2 = pq_inv + pq_inv
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: t1, t2, B10, B01, B00
+  ix = a_x + b_x
+  jx = c_x + d_x
+  iy = a_y + b_y
+  jy = c_y + d_y
+  iz = a_z + b_z
+  jz = c_z + d_z
+  sx = ix  +  jx
+  sy = iy  +  jy
+  sz = iz  +  jz
+
+  do i = 1, n_pt
+    B10(i) = p10_1 - gauleg_t2(i, j) * p10_2
+    B01(i) = p01_1 - gauleg_t2(i, j) * p01_2
+    B00(i) = gauleg_t2(i, j) * pq_inv
+  enddo
+
+  if(sx > 0) then
+    call I_x1_new_cosgtos(ix, jx, B10, B01, B00, t1, n_pt)
+  else
+    do i = 1, n_pt
+      t1(i) = (1.d0, 0.d0)
+    enddo
+  endif
+
+  if(sy > 0) then
+    call I_x1_new_cosgtos(iy, jy, B10, B01, B00, t2, n_pt)
+    do i = 1, n_pt
+      t1(i) = t1(i) * t2(i)
+    enddo
+  endif
+
+  if(sz > 0) then
+    call I_x1_new_cosgtos(iz, jz, B10, B01, B00, t2, n_pt)
+    do i = 1, n_pt
+      t1(i) = t1(i) * t2(i)
+    enddo
+  endif
+
+  I_f = (0.d0, 0.d0)
+  do i = 1, n_pt
+    I_f += gauleg_w(i, j) * t1(i)
+  enddo
+
+end subroutine integrale_new_cosgtos
+
+! ---
+
+recursive subroutine I_x1_new_cosgtos(a, c, B_10, B_01, B_00, res, n_pt)
+
+  BEGIN_DOC
+  !  recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)  :: a, c, n_pt
+  complex*16, intent(in)  :: B_10(n_pt_max_integrals), B_01(n_pt_max_integrals), B_00(n_pt_max_integrals)
+  complex*16, intent(out) :: res(n_pt_max_integrals)
+
+  integer                 :: i
+  complex*16              :: res2(n_pt_max_integrals)
+
+  if(c < 0) then
+
+    do i = 1, n_pt
+      res(i) = (0.d0, 0.d0)
+    enddo
+
+  else if (a == 0) then
+
+    call I_x2_new_cosgtos(c, B_10, B_01, B_00, res, n_pt)
+
+  else if (a == 1) then
+
+    call I_x2_new_cosgtos(c-1, B_10, B_01, B_00, res, n_pt)
+    do i = 1, n_pt
+      res(i) = dble(c) * B_00(i) * res(i)
+    enddo
+
+  else
+
+    call I_x1_new_cosgtos(a-2, c  , B_10, B_01, B_00, res , n_pt)
+    call I_x1_new_cosgtos(a-1, c-1, B_10, B_01, B_00, res2, n_pt)
+    do i = 1, n_pt
+      res(i) = dble(a-1) * B_10(i) * res(i) + dble(c) * B_00(i) * res2(i)
+    enddo
+
+  endif
+
+end subroutine I_x1_new_cosgtos
+
+! ---
+
+recursive subroutine I_x2_new_cosgtos(c, B_10, B_01, B_00, res, n_pt)
+
+  BEGIN_DOC
+  !  recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)  :: c, n_pt
+  complex*16, intent(in)  :: B_10(n_pt_max_integrals), B_01(n_pt_max_integrals), B_00(n_pt_max_integrals)
+  complex*16, intent(out) :: res(n_pt_max_integrals)
+
+  integer                 :: i
+
+  if(c == 1) then
+
+    do i = 1, n_pt
+      res(i) = (0.d0, 0.d0)
+    enddo
+
+  elseif(c == 0) then
+
+    do i = 1, n_pt
+      res(i) = (1.d0, 0.d0)
+    enddo
+
+  else
+
+    call I_x1_new_cosgtos(0, c-2, B_10, B_01, B_00, res, n_pt)
+    do i = 1, n_pt
+      res(i) = dble(c-1) * B_01(i) * res(i)
+    enddo
+
+  endif
+
+end subroutine I_x2_new_cosgtos
+
+! ---
+
+subroutine give_cpolynom_mult_center_x( P_center, Q_center, a_x, d_x, p, q, n_pt_in &
+                                      , pq_inv, pq_inv_2, p10_1, p01_1, p10_2, p01_2, d, n_pt_out)
+
+  BEGIN_DOC
+  ! subroutine that returns the explicit polynom in term of the "t"
+  ! variable of the following polynoms :
+  !
+  ! $I_{x_1}(a_x,d_x,p,q) \, I_{x_1}(a_y,d_y,p,q) \ I_{x_1}(a_z,d_z,p,q)$
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)  :: n_pt_in, a_x, d_x
+  complex*16, intent(in)  :: P_center, Q_center, p, q, pq_inv, p10_1, p01_1, p10_2, p01_2, pq_inv_2
+  integer,    intent(out) :: n_pt_out
+  complex*16, intent(out) :: d(0:max_dim)
+
+  integer                 :: n_pt1, i
+  complex*16              :: B10(0:2), B01(0:2), B00(0:2), C00(0:2), D00(0:2)
+
+  ASSERT (n_pt_in >= 0)
+
+  B10(0) = p10_1
+  B10(1) = (0.d0, 0.d0)
+  B10(2) = -p10_2
+
+  B01(0) = p01_1
+  B01(1) = (0.d0, 0.d0)
+  B01(2) = -p01_2
+
+  B00(0) = (0.d0, 0.d0)
+  B00(1) = (0.d0, 0.d0)
+  B00(2) = pq_inv
+
+  C00(0) = (0.d0, 0.d0)
+  C00(1) = (0.d0, 0.d0)
+  C00(2) =  -q * (P_center - Q_center) * pq_inv_2
+
+  D00(0) = (0.d0, 0.d0)
+  D00(1) = (0.d0, 0.d0)
+  D00(2) =  -p * (Q_center - P_center) * pq_inv_2
+
+  do i = 0, n_pt_in
+    d(i) = (0.d0, 0.d0)
+  enddo
+
+  n_pt1 = n_pt_in
+
+  !DIR$ FORCEINLINE
+  call I_x1_pol_mult_cosgtos(a_x, d_x, B10, B01, B00, C00, D00, d, n_pt1, n_pt_in)
+  n_pt_out = n_pt1
+
+!  print *, ' '
+!  print *, a_x, d_x
+!  print *, real(B10), real(B01), real(B00), real(C00), real(D00)
+!  print *, n_pt1, real(d(0:n_pt1))
+!  print *, ' '
+
+  if(n_pt1 < 0) then
+    n_pt_out = -1
+    do i = 0, n_pt_in
+      d(i) = (0.d0, 0.d0)
+    enddo
+    return
+  endif
+
+end subroutine give_cpolynom_mult_center_x
+
+! ---
+
+subroutine I_x1_pol_mult_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+
+  BEGIN_DOC
+  ! Recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: n_pt_in, a, c
+  complex*16, intent(in)    :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max_dim)
+
+  if( (c >= 0) .and. (nd >= 0) ) then
+
+    if(a == 1) then
+      call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+    else if(a == 2) then
+      call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+    else if(a > 2) then
+      call I_x1_pol_mult_recurs_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+    else  ! a == 0
+
+      if(c == 0)then
+        nd = 0
+        d(0) = (1.d0, 0.d0)
+        return
+      endif
+
+      call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+    endif
+
+  else
+
+    nd = -1
+
+  endif
+
+end subroutine I_x1_pol_mult_cosgtos
+
+! ---
+
+recursive subroutine I_x1_pol_mult_recurs_cosgtos(a, c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+
+  BEGIN_DOC
+  ! Recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: n_pt_in, a, c
+  complex*16, intent(in)    :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max_dim)
+
+  integer                   :: nx, ix, iy, ny
+  complex*16                :: X(0:max_dim)
+  complex*16                :: Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
+
+  ASSERT (a > 2)
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    X(ix) = (0.d0, 0.d0)
+  enddo
+
+  nx = 0
+  if(a == 3) then
+    call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+  elseif(a == 4) then
+    call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+  else
+    ASSERT (a >= 5)
+    call I_x1_pol_mult_recurs_cosgtos(a-2, c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+  endif
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, nx
+    X(ix) *= dble(a-1)
+  enddo
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(X, nx, B_10, 2, d, nd)
+  nx = nd
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    X(ix) = (0.d0, 0.d0)
+  enddo
+
+  if(c > 0) then
+
+    if(a == 3) then
+      call I_x1_pol_mult_a2_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+    else
+      ASSERT(a >= 4)
+      call I_x1_pol_mult_recurs_cosgtos(a-1, c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+    endif
+
+    if(c > 1) then
+      !DIR$ LOOP COUNT(8)
+      do ix = 0, nx
+        X(ix) *= dble(c)
+      enddo
+    endif
+    !DIR$ FORCEINLINE
+    call multiply_cpoly(X, nx, B_00, 2, d, nd)
+
+  endif
+
+  ny = 0
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    Y(ix) = (0.d0, 0.d0)
+  enddo
+
+  ASSERT (a > 2)
+
+  if(a == 3) then
+    call I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in)
+  else
+    ASSERT(a >= 4)
+    call I_x1_pol_mult_recurs_cosgtos(a-1, c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in)
+  endif
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(Y, ny, C_00, 2, d, nd)
+
+end subroutine I_x1_pol_mult_recurs_cosgtos
+
+! ---
+
+recursive subroutine I_x1_pol_mult_a1_cosgtos(c,B_10,B_01,B_00,C_00,D_00,d,nd,n_pt_in)
+
+  BEGIN_DOC
+  ! Recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: n_pt_in, c
+  complex*16, intent(in)    :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max_dim)
+
+  integer                   :: nx, ix, iy, ny
+  complex*16                :: X(0:max_dim)
+  complex*16                :: Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
+
+  if( (c < 0) .or. (nd < 0) ) then
+    nd = -1
+    return
+  endif
+
+  nx = nd
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    X(ix) = (0.d0, 0.d0)
+  enddo
+  call I_x2_pol_mult_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+
+  if(c > 1) then
+    !DIR$ LOOP COUNT(8)
+    do ix = 0, nx
+      X(ix) *= dble(c)
+    enddo
+  endif
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(X, nx, B_00, 2, d, nd)
+
+  ny = 0
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    Y(ix) = (0.d0, 0.d0)
+  enddo
+  call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in)
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(Y, ny, C_00, 2, d, nd)
+
+end subroutine I_x1_pol_mult_a1_cosgtos
+
+! ---
+
+recursive subroutine I_x1_pol_mult_a2_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, n_pt_in)
+
+  BEGIN_DOC
+  ! Recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: n_pt_in, c
+  complex*16, intent(in)    :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max_dim)
+
+  integer                   :: nx, ix, iy, ny
+  complex*16                :: X(0:max_dim)
+  complex*16                :: Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X,Y
+
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    X(ix) = (0.d0, 0.d0)
+  enddo
+
+  nx = 0
+  call I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(X, nx, B_10, 2, d, nd)
+
+  nx = nd
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    X(ix) = (0.d0, 0.d0)
+  enddo
+
+  !DIR$ FORCEINLINE
+  call I_x1_pol_mult_a1_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, X, nx, n_pt_in)
+
+  if (c>1) then
+    !DIR$ LOOP COUNT(8)
+    do ix = 0, nx
+      X(ix) *= dble(c)
+    enddo
+  endif
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(X, nx, B_00, 2, d, nd)
+
+  ny = 0
+  !DIR$ LOOP COUNT(8)
+  do ix = 0, n_pt_in
+    Y(ix) = 0.d0
+  enddo
+  !DIR$ FORCEINLINE
+  call I_x1_pol_mult_a1_cosgtos(c, B_10, B_01, B_00, C_00, D_00, Y, ny, n_pt_in)
+
+  !DIR$ FORCEINLINE
+  call multiply_cpoly(Y, ny, C_00, 2, d, nd)
+
+end subroutine I_x1_pol_mult_a2_cosgtos
+
+! ---
+
+recursive subroutine I_x2_pol_mult_cosgtos(c, B_10, B_01, B_00, C_00, D_00, d, nd, dim)
+
+  BEGIN_DOC
+  ! Recursive function involved in the two-electron integral
+  END_DOC
+
+  implicit none
+  include 'utils/constants.include.F'
+
+  integer,    intent(in)    :: dim, c
+  complex*16, intent(in)    :: B_10(0:2), B_01(0:2), B_00(0:2), C_00(0:2), D_00(0:2)
+  integer,    intent(inout) :: nd
+  complex*16, intent(inout) :: d(0:max_dim)
+
+  integer                   :: i
+  integer                   :: nx, ix, ny
+  complex*16                :: X(0:max_dim), Y(0:max_dim)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
+
+  select case (c)
+
+    case (0)
+      nd = 0
+      d(0) = (1.d0, 0.d0)
+      return
+
+    case (:-1)
+      nd = -1
+      return
+
+    case (1)
+      nd = 2
+      d(0) = D_00(0)
+      d(1) = D_00(1)
+      d(2) = D_00(2)
+      return
+
+    case (2)
+      nd = 2
+      d(0) = B_01(0)
+      d(1) = B_01(1)
+      d(2) = B_01(2)
+
+      ny = 2
+      Y(0) = D_00(0)
+      Y(1) = D_00(1)
+      Y(2) = D_00(2)
+
+      !DIR$ FORCEINLINE
+      call multiply_cpoly(Y, ny, D_00, 2, d, nd)
+      return
+
+    case default
+
+      !DIR$ LOOP COUNT(6)
+      do ix = 0, c+c
+        X(ix) = (0.d0, 0.d0)
+      enddo
+      nx = 0
+      call I_x2_pol_mult_cosgtos(c-2, B_10, B_01, B_00, C_00, D_00, X, nx, dim)
+
+      !DIR$ LOOP COUNT(6)
+      do ix = 0, nx
+        X(ix) *= dble(c-1)
+      enddo
+
+      !DIR$ FORCEINLINE
+      call multiply_cpoly(X, nx, B_01, 2, d, nd)
+
+      ny = 0
+      !DIR$ LOOP COUNT(6)
+      do ix = 0, c+c
+        Y(ix) = 0.d0
+      enddo
+      call I_x2_pol_mult_cosgtos(c-1, B_10, B_01, B_00, C_00, D_00, Y, ny, dim)
+
+      !DIR$ FORCEINLINE
+      call multiply_cpoly(Y, ny, D_00, 2, d, nd)
+
+  end select
+
+end subroutine I_x2_pol_mult_cosgtos
+
+! ---
+
+
diff --git a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f
index cc689391..1bed60fe 100644
--- a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f
+++ b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f
@@ -128,10 +128,10 @@ subroutine davidson_general_ext_rout_nonsym_b1space(u_in, H_jj, energies, sze, N
 
     if(itermax > 4) then
       itermax = itermax - 1
-    else if (m==1.and.disk_based_davidson) then
-      m = 0
-      disk_based = .True.
-      itermax = 6
+!    else if (m==1.and.disk_based_davidson) then
+!      m = 0
+!      disk_based = .True.
+!      itermax = 6
     else
       nproc_target = nproc_target - 1
     endif
diff --git a/src/davidson_keywords/EZFIO.cfg b/src/davidson_keywords/EZFIO.cfg
new file mode 100644
index 00000000..5df761f3
--- /dev/null
+++ b/src/davidson_keywords/EZFIO.cfg
@@ -0,0 +1,54 @@
+[threshold_davidson]
+type: Threshold
+doc: Thresholds of Davidson's algorithm if threshold_davidson_from_pt2 is false.
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
+[threshold_nonsym_davidson]
+type: Threshold
+doc: Thresholds of non-symetric Davidson's algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-12
+
+[davidson_sze_max]
+type: Strictly_positive_int
+doc: Number of micro-iterations before re-contracting
+default: 15
+interface: ezfio,provider,ocaml
+
+[state_following]
+type: logical
+doc: If |true|, the states are re-ordered to match the input states
+default: False
+interface: ezfio,provider,ocaml
+
+[disk_based_davidson]
+type: logical
+doc: If |true|, a memory-mapped file may be used to store the W and S2 vectors if not enough RAM is availabl
+default: True
+interface: ezfio,provider,ocaml
+
+[n_states_diag]
+type: States_number
+doc: Controls the number of states to consider during the Davdison diagonalization. The number of states is n_states * n_states_diag
+default: 4
+interface: ezfio,ocaml
+
+[n_det_max_full]
+type: Det_number_max
+doc: Maximum number of determinants where |H| is fully diagonalized
+interface: ezfio,provider,ocaml
+default: 1000
+
+[threshold_davidson_from_pt2]
+type: logical
+doc: Thresholds of Davidson's algorithm is set to E(rPT2)*threshold_davidson_from_pt2
+interface: ezfio,provider,ocaml
+default: false
+
+[distributed_davidson]
+type: logical
+doc: If |true|, use the distributed algorithm
+default: True
+interface: ezfio,provider,ocaml
+
diff --git a/src/davidson_keywords/NEED b/src/davidson_keywords/NEED
new file mode 100644
index 00000000..5a3182ed
--- /dev/null
+++ b/src/davidson_keywords/NEED
@@ -0,0 +1 @@
+ezfio_files
diff --git a/src/davidson_keywords/README.rst b/src/davidson_keywords/README.rst
new file mode 100644
index 00000000..cdc7cc1f
--- /dev/null
+++ b/src/davidson_keywords/README.rst
@@ -0,0 +1,4 @@
+=================
+davidson_keywords
+=================
+
diff --git a/src/davidson_keywords/input.irp.f b/src/davidson_keywords/input.irp.f
new file mode 100644
index 00000000..4bd79036
--- /dev/null
+++ b/src/davidson_keywords/input.irp.f
@@ -0,0 +1,43 @@
+
+! ---
+
+BEGIN_PROVIDER [ integer, n_states_diag  ]
+  implicit none
+  BEGIN_DOC
+! Number of states to consider during the Davdison diagonalization
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  if (mpi_master) then
+
+    call ezfio_has_davidson_keywords_n_states_diag(has)
+    if (has) then
+      call ezfio_get_davidson_keywords_n_states_diag(n_states_diag)
+    else
+      print *, 'davidson_keywords/n_states_diag not found in EZFIO file'
+      stop 1
+    endif
+    n_states_diag = max(2,N_states * N_states_diag)
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST( n_states_diag, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read n_states_diag with MPI'
+    endif
+  IRP_ENDIF
+
+  call write_time(6)
+  if (mpi_master) then
+    write(6, *) 'Read  n_states_diag'
+  endif
+
+END_PROVIDER
+
+! ---
diff --git a/src/davidson_keywords/usef.irp.f b/src/davidson_keywords/usef.irp.f
new file mode 100644
index 00000000..fed2ba9b
--- /dev/null
+++ b/src/davidson_keywords/usef.irp.f
@@ -0,0 +1,33 @@
+use bitmasks
+use f77_zmq
+
+
+! ---
+
+BEGIN_PROVIDER [ integer, nthreads_davidson ]
+ implicit none
+ BEGIN_DOC
+ ! Number of threads for Davidson
+ END_DOC
+ nthreads_davidson = nproc
+ character*(32) :: env
+ call getenv('QP_NTHREADS_DAVIDSON',env)
+ if (trim(env) /= '') then
+   read(env,*) nthreads_davidson
+   call write_int(6,nthreads_davidson,'Target number of threads for <Psi|H|Psi>')
+ endif
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, threshold_davidson_pt2 ]
+ implicit none
+ BEGIN_DOC
+ ! Threshold of Davidson's algorithm, using PT2 as a guide
+ END_DOC
+ threshold_davidson_pt2 = threshold_davidson
+
+END_PROVIDER
+
+! ---
+
diff --git a/src/iterations_tc/EZFIO.cfg b/src/iterations_tc/EZFIO.cfg
new file mode 100644
index 00000000..2a5e94a7
--- /dev/null
+++ b/src/iterations_tc/EZFIO.cfg
@@ -0,0 +1,24 @@
+[n_iter]
+interface: ezfio
+doc: Number of saved iterations
+type:integer
+default: 1
+
+[n_det_iterations]
+interface: ezfio, provider
+doc: Number of determinants at each iteration
+type: integer
+size: (100)
+ 
+[energy_iterations]
+interface: ezfio, provider
+doc: The variational energy at each iteration
+type: double precision 
+size: (determinants.n_states,100)
+
+[pt2_iterations]
+interface: ezfio, provider
+doc: The |PT2| correction at each iteration
+type: double precision 
+size: (determinants.n_states,100)
+
diff --git a/src/iterations_tc/NEED b/src/iterations_tc/NEED
new file mode 100644
index 00000000..e69de29b
diff --git a/src/iterations_tc/io.irp.f b/src/iterations_tc/io.irp.f
new file mode 100644
index 00000000..821f5e84
--- /dev/null
+++ b/src/iterations_tc/io.irp.f
@@ -0,0 +1,37 @@
+BEGIN_PROVIDER [ integer, n_iter  ]
+  implicit none
+  BEGIN_DOC
+! number of iterations
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  if (mpi_master) then
+
+      double precision :: zeros(N_states,100)
+      integer :: izeros(100)
+      zeros = 0.d0
+      izeros = 0
+      call ezfio_set_iterations_n_iter(0)
+      call ezfio_set_iterations_energy_iterations(zeros)
+      call ezfio_set_iterations_pt2_iterations(zeros)
+      call ezfio_set_iterations_n_det_iterations(izeros)
+      n_iter = 1
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST( n_iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read n_iter with MPI'
+    endif
+  IRP_ENDIF
+
+  call write_time(6)
+
+END_PROVIDER
+
diff --git a/src/iterations_tc/iterations.irp.f b/src/iterations_tc/iterations.irp.f
new file mode 100644
index 00000000..2f1cf0c1
--- /dev/null
+++ b/src/iterations_tc/iterations.irp.f
@@ -0,0 +1,43 @@
+BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Extrapolated energy, using E_var = f(PT2) where PT2=0
+ END_DOC
+! integer :: i
+ extrapolated_energy = 0.D0
+END_PROVIDER 
+
+ subroutine get_extrapolated_energy(Niter,ept2,pt1,extrap_energy)
+ implicit none
+ integer, intent(in)  :: Niter
+ double precision, intent(in) :: ept2(Niter),pt1(Niter),extrap_energy(Niter)
+ call extrapolate_data(Niter,ept2,pt1,extrap_energy)
+ end
+
+subroutine save_iterations(e_, pt2_,n_)
+  implicit none
+  BEGIN_DOC
+! Update the energy in the EZFIO file.
+  END_DOC
+  integer, intent(in) :: n_
+  double precision, intent(in) :: e_(N_states), pt2_(N_states)
+  integer :: i
+
+  if (N_iter == 101) then
+    do i=2,N_iter-1
+      energy_iterations(1:N_states,N_iter-1) = energy_iterations(1:N_states,N_iter)
+      pt2_iterations(1:N_states,N_iter-1) = pt2_iterations(1:N_states,N_iter) 
+    enddo
+    N_iter = N_iter-1
+    TOUCH N_iter
+  endif
+
+  energy_iterations(1:N_states,N_iter) = e_(1:N_states)
+     pt2_iterations(1:N_states,N_iter) = pt2_(1:N_states)
+  n_det_iterations(N_iter) = n_
+  call ezfio_set_iterations_N_iter(N_iter)
+  call ezfio_set_iterations_energy_iterations(energy_iterations)
+  call ezfio_set_iterations_pt2_iterations(pt2_iterations)
+  call ezfio_set_iterations_n_det_iterations(n_det_iterations)
+end
+
diff --git a/src/iterations_tc/print_extrapolation.irp.f b/src/iterations_tc/print_extrapolation.irp.f
new file mode 100644
index 00000000..cb46fb67
--- /dev/null
+++ b/src/iterations_tc/print_extrapolation.irp.f
@@ -0,0 +1,46 @@
+subroutine print_extrapolated_energy
+  implicit none
+  BEGIN_DOC
+! Print the extrapolated energy in the output
+  END_DOC
+
+  integer :: i,k
+
+  if (N_iter< 2) then
+    return
+  endif
+  write(*,'(A)') ''
+  write(*,'(A)') 'Extrapolated energies'
+  write(*,'(A)') '------------------------'
+  write(*,'(A)') ''
+
+  print *,  ''
+  print *,  'State ', 1
+  print *,  ''
+  write(*,*)  '=========== ', '==================='
+  write(*,*)  'minimum PT2 ', 'Extrapolated energy'
+  write(*,*)  '=========== ', '==================='
+  do k=2,min(N_iter,8)
+    write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter+1-k), extrapolated_energy(k,1)
+  enddo
+  write(*,*)  '=========== ', '==================='
+
+  do i=2, min(N_states,N_det)
+    print *,  ''
+    print *,  'State ', i
+    print *,  ''
+    write(*,*)  '=========== ', '=================== ', '=================== ', '==================='
+    write(*,*)  'minimum PT2 ', 'Extrapolated energy ', '  Excitation (a.u)  ', '  Excitation (eV)  '
+    write(*,*)  '=========== ', '=================== ', '=================== ', '==================='
+    do k=2,min(N_iter,8)
+      write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter+1-k), extrapolated_energy(k,i), &
+          extrapolated_energy(k,i) - extrapolated_energy(k,1), &
+          (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0
+    enddo
+    write(*,*)  '=========== ', '=================== ', '=================== ', '==================='
+  enddo
+
+  print *,  ''
+
+end subroutine
+
diff --git a/src/iterations_tc/print_summary.irp.f b/src/iterations_tc/print_summary.irp.f
new file mode 100644
index 00000000..8e6285e2
--- /dev/null
+++ b/src/iterations_tc/print_summary.irp.f
@@ -0,0 +1,104 @@
+subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_configuration_,n_st,s2_)
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Print the extrapolated energy in the output
+  END_DOC
+
+  integer, intent(in)            :: n_det_, n_configuration_, n_st
+  double precision, intent(in)   :: e_(n_st), s2_(n_st)
+  type(pt2_type)  , intent(in)   :: pt2_data, pt2_data_err
+  integer                        :: i, k
+  integer                        :: N_states_p
+  character*(9)                  :: pt2_string
+  character*(512)                :: fmt
+
+  if (do_pt2) then
+    pt2_string = '        '
+  else
+    pt2_string = '(approx)'
+  endif
+
+  N_states_p = min(N_det_,n_st)
+
+  print *, ''
+  print '(A,I12)',  'Summary at N_det = ', N_det_
+  print '(A)',      '-----------------------------------'
+  print *, ''
+
+  write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))'
+  write(*,fmt)
+  write(fmt,*) '(13X,', N_states_p, '(6X,A7,1X,I6,10X))'
+  write(*,fmt) ('State',k, k=1,N_states_p)
+  write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))'
+  write(*,fmt)
+  write(fmt,*) '(A13,', N_states_p, '(1X,F14.8,15X))'
+  write(*,fmt) '# E          ', e_(1:N_states_p)
+  if (N_states_p > 1) then
+    write(*,fmt) '# Excit. (au)', e_(1:N_states_p)-e_(1)
+    write(*,fmt) '# Excit. (eV)', (e_(1:N_states_p)-e_(1))*27.211396641308d0
+  endif
+  write(fmt,*) '(A13,', 2*N_states_p, '(1X,F14.8))'
+  write(*,fmt) '# PT2 '//pt2_string, (pt2_data % pt2(k), pt2_data_err % pt2(k), k=1,N_states_p)
+  write(*,fmt) '# rPT2'//pt2_string, (pt2_data % rpt2(k), pt2_data_err % rpt2(k), k=1,N_states_p)
+  write(*,'(A)') '#'
+  write(*,fmt) '# E+PT2      ', (e_(k)+pt2_data % pt2(k),pt2_data_err % pt2(k), k=1,N_states_p)
+  write(*,fmt) '# E+rPT2     ', (e_(k)+pt2_data % rpt2(k),pt2_data_err % rpt2(k), k=1,N_states_p)
+  if (N_states_p > 1) then
+    write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1)), &
+      dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1)), k=1,N_states_p)
+    write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1))*27.211396641308d0, &
+      dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1))*27.211396641308d0, k=1,N_states_p)
+  endif
+  write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))'
+  write(*,fmt)
+  print *,  ''
+
+  print *,  'N_det             = ', N_det_
+  print *,  'N_states          = ', n_st
+  if (s2_eig) then
+    print *,  'N_cfg             = ', N_configuration_
+    if (only_expected_s2) then
+      print *,  'N_csf             = ', N_csf
+    endif
+  endif
+  print *,  ''
+
+  do k=1, N_states_p
+    print*,'* State ',k
+    print *,  '< S^2 >         = ', s2_(k)
+    print *,  'E               = ', e_(k)
+    print *,  'Variance        = ', pt2_data % variance(k), ' +/- ', pt2_data_err % variance(k)
+    print *,  'PT norm         = ', dsqrt(pt2_data % overlap(k,k)), ' +/- ', 0.5d0*dsqrt(pt2_data % overlap(k,k)) * pt2_data_err % overlap(k,k) / (pt2_data % overlap(k,k))
+    print *,  'PT2             = ', pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k)
+    print *,  'rPT2            = ', pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k)
+    print *,  'E+PT2 '//pt2_string//' = ', e_(k)+pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k)
+    print *,  'E+rPT2'//pt2_string//' = ', e_(k)+pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k)
+    print *,  ''
+  enddo
+
+  print *,  '-----'
+  if(n_st.gt.1)then
+    print *, 'Variational Energy difference (au | eV)'
+    do i=2, N_states_p
+      print*,'Delta E = ', (e_(i) - e_(1)), &
+        (e_(i) - e_(1)) * 27.211396641308d0
+    enddo
+    print *,  '-----'
+    print*, 'Variational + perturbative Energy difference (au | eV)'
+    do i=2, N_states_p
+      print*,'Delta E = ', (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))), &
+        (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))) * 27.211396641308d0
+    enddo
+    print *,  '-----'
+    print*, 'Variational + renormalized perturbative Energy difference (au | eV)'
+    do i=2, N_states_p
+      print*,'Delta E = ', (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))), &
+        (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))) * 27.211396641308d0
+    enddo
+  endif
+
+!  call print_energy_components()
+
+end subroutine
+
diff --git a/src/non_h_ints_mu/README.rst b/src/non_h_ints_mu/README.rst
index 6a36bb98..fb1c25b1 100644
--- a/src/non_h_ints_mu/README.rst
+++ b/src/non_h_ints_mu/README.rst
@@ -9,3 +9,7 @@ The two providers are :
 +) ao_non_hermit_term_chemist which returns the non hermitian part of the two-electron TC Hamiltonian on the MO basis. 
 +) mo_non_hermit_term_chemist which returns the non hermitian part of the two-electron TC Hamiltonian on the BI-ORTHO MO basis. 
 
+
+!\sum_mm = 1,3 \sum_R phi_i(R) \phi_k(R) grad_1_u_ij_mu(j,l,R,mm)  grad_1_u_ij_mu(m,n,R,mm)
+!\sum_mm+= 1,3 \sum_R phi_j(R) \phi_l(R) grad_1_u_ij_mu(i,k,R,mm)  grad_1_u_ij_mu(m,n,R,mm)
+!\sum_mm+= 1,3 \sum_R phi_m(R) \phi_n(R) grad_1_u_ij_mu(i,k,R,mm)  grad_1_u_ij_mu(j,l,R,mm)
diff --git a/src/non_h_ints_mu/grad_squared.irp.f b/src/non_h_ints_mu/grad_squared.irp.f
index 4e70bc5c..d625052b 100644
--- a/src/non_h_ints_mu/grad_squared.irp.f
+++ b/src/non_h_ints_mu/grad_squared.irp.f
@@ -155,6 +155,103 @@ END_PROVIDER
 
 ! ---
 
+ BEGIN_PROVIDER [ double precision, grad_1_squared_u_ij_mu_new, (n_points_final_grid, ao_num, ao_num)]
+ implicit none
+ integer :: ipoint,i,j,m,igauss
+ BEGIN_DOC
+ ! grad_1_squared_u_ij_mu(j,i,ipoint) = -1/2 \int dr2 phi_j(r2) phi_i(r2) |\grad_r1 u(r1,r2,\mu)|^2 
+ ! |\grad_r1 u(r1,r2,\mu)|^2 = 1/4 * (1 - erf(mu*r12))^2 
+ ! ! (1 - erf(mu*r12))^2 = \sum_i coef_gauss_1_erf_x_2(i) * exp(-expo_gauss_1_erf_x_2(i) * r12^2)
+ END_DOC
+  include 'constants.include.F'
+ double precision :: r(3),delta,coef
+ double precision :: overlap_gauss_r12_ao,time0,time1
+ integer :: num_a,num_b,power_A(3), power_B(3),l,k
+ double precision :: A_center(3), B_center(3),overlap_gauss_r12,alpha,beta,analytical_j
+ double precision  :: A_new(0:max_dim,3)! new polynom 
+ double precision  :: A_center_new(3)   ! new center
+ integer           :: iorder_a_new(3)   ! i_order(i) = order of the new polynom ==> should be equal to power_A
+ double precision  :: alpha_new         ! new exponent
+ double precision  :: fact_a_new, coef_i, coef_j, k_ab,center_new(3),p_new,c_tmp,coef_last        ! constant factor
+ double precision  :: coefxy, coefx, coefy, coefz,coefxyz
+ integer           :: d(3),lx,ly,lz,iorder_tmp(3),dim1
+ double precision  :: overlap,overlap_x,overlap_y,overlap_z,thr
+ dim1=100
+ thr = 0.d0
+ print*,'providing grad_1_squared_u_ij_mu_new ...'
+ grad_1_squared_u_ij_mu_new = 0.d0
+ call wall_time(time0)
+ !TODO : strong optmization : write the loops in a different way
+ !     : for each couple of AO, the gaussian product are done once for all 
+ d = 0
+ do i = 1, ao_num
+  do j = 1, ao_num
+   ! \int dr2 phi_j(r2) phi_i(r2) (1 - erf(mu*r12))^2 
+   ! = \sum_i coef_gauss_1_erf_x_2(i) \int dr2 phi_j(r2) phi_i(r2) exp(-expo_gauss_1_erf_x_2(i) * (r_1 - r_2)^2)
+   if(ao_overlap_abs(j,i).lt.1.d-12)then
+    cycle
+   endif
+   num_A = ao_nucl(i)
+   power_A(1:3)= ao_power(i,1:3)
+   A_center(1:3) = nucl_coord(num_A,1:3)
+   num_B = ao_nucl(j)
+   power_B(1:3)= ao_power(j,1:3)
+   B_center(1:3) = nucl_coord(num_B,1:3)
+   do l=1,ao_prim_num(i)
+    coef_i = ao_coef_normalized_ordered_transp(l,i)  
+    alpha = ao_expo_ordered_transp(l,i)     
+    do k=1,ao_prim_num(j)
+     beta = ao_expo_ordered_transp(k,j)     
+     coef_j = ao_coef_normalized_ordered_transp(k,j) 
+
+     ! New gaussian/polynom defined by :: new pol new center new expo   cst fact new order                                
+     ! from gaussian_A * gaussian_B
+     call give_explicit_poly_and_gaussian(A_new , A_center_new , alpha_new, fact_a_new , iorder_a_new , & 
+                                      beta,alpha,power_B,power_A,B_center,A_center,n_pt_max_integrals)
+     c_tmp = coef_i*coef_j*fact_a_new
+     if(dabs(c_tmp).lt.thr)cycle
+     do ipoint = 1, n_points_final_grid
+      r(1) = final_grid_points(1,ipoint)
+      r(2) = final_grid_points(2,ipoint)
+      r(3) = final_grid_points(3,ipoint)
+      do igauss = 1, n_max_fit_slat
+       delta = expo_gauss_1_erf_x_2(igauss)
+       coef  = coef_gauss_1_erf_x_2(igauss)
+       coef_last = c_tmp * coef
+       if(dabs(coef_last).lt.thr)cycle
+       do lx = 0, iorder_a_new(1)
+        coefx = A_new(lx,1)
+        coefx *= coef_last 
+!        if(dabs(coefx).lt.thr)cycle
+        iorder_tmp(1) = lx
+        do ly = 0, iorder_a_new(2)
+         coefy = A_new(ly,2)
+         coefxy= coefx*coefy
+!         if(dabs(coefxy).lt.thr)cycle
+         iorder_tmp(2) = ly
+         do lz = 0, iorder_a_new(3)
+          coefz = A_new(lz,3)
+          coefxyz = coefz * coefxy
+!          if(dabs(coefxyz).lt.thr)cycle
+          iorder_tmp(3) = lz
+!          call gaussian_product(alpha_new,A_center_new,delta,r,k_ab,p_new,center_new)
+!          if(dabs(coef_last*k_ab).lt.thr)cycle
+          call overlap_gaussian_xyz(A_center_new,r,alpha_new,delta,iorder_tmp,d,overlap_x,overlap_y,overlap_z,overlap,dim1)
+          grad_1_squared_u_ij_mu_new(ipoint,j,i) += -0.25 * coefxyz * overlap
+         enddo ! igauss
+        enddo ! ipoint 
+       enddo ! lz
+      enddo ! ly
+     enddo ! lx 
+    enddo ! k
+   enddo ! l
+  enddo ! j
+ enddo ! i
+ call wall_time(time1)
+ print*,'Wall time for grad_1_squared_u_ij_mu_new = ',time1 - time0
+ END_PROVIDER 
+
+
 BEGIN_PROVIDER [double precision, tc_grad_square_ao, (ao_num, ao_num, ao_num, ao_num)]
 
   BEGIN_DOC
diff --git a/src/non_h_ints_mu/test_non_h_ints.irp.f b/src/non_h_ints_mu/test_non_h_ints.irp.f
deleted file mode 100644
index c535d0c5..00000000
--- a/src/non_h_ints_mu/test_non_h_ints.irp.f
+++ /dev/null
@@ -1,102 +0,0 @@
-program test_non_h
- implicit none
-  my_grid_becke  = .True.
-  my_n_pt_r_grid = 50
-  my_n_pt_a_grid = 74
-!  my_n_pt_r_grid = 10 ! small grid for quick debug
-!  my_n_pt_a_grid = 26 ! small grid for quick debug
-  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
-!call routine_grad_squared
- call routine_fit
-end
-
-subroutine routine_lapl_grad
- implicit none
- integer :: i,j,k,l
- double precision :: grad_lapl, get_ao_tc_sym_two_e_pot,new,accu,contrib
- double precision :: ao_two_e_integral_erf,get_ao_two_e_integral,count_n,accu_relat
-! !!!!!!!!!!!!!!!!!!!!! WARNING
-! THIS ROUTINE MAKES SENSE ONLY IF HAND MODIFIED coef_gauss_eff_pot(1:n_max_fit_slat) = 0. to cancel (1-erf(mu*r12))^2
- accu = 0.d0
- accu_relat = 0.d0
- count_n = 0.d0
- do i = 1, ao_num
-  do j = 1, ao_num
-   do k = 1, ao_num
-    do l = 1, ao_num
-     grad_lapl  = get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map) ! pure gaussian part : comes from Lapl
-     grad_lapl += ao_two_e_integral_erf(i, k, j, l)                        ! erf(mu r12)/r12    : comes from Lapl
-     grad_lapl += ao_non_hermit_term_chemist(k,i,l,j)                      ! \grad u(r12) . grad
-     new        = tc_grad_and_lapl_ao(k,i,l,j)
-     new       += get_ao_two_e_integral(i,j,k,l,ao_integrals_map)
-     contrib    = dabs(new - grad_lapl)
-     if(dabs(grad_lapl).gt.1.d-12)then
-      count_n += 1.d0
-      accu_relat += 2.0d0 * contrib/dabs(grad_lapl+new)
-     endif
-     if(contrib.gt.1.d-10)then
-      print*,i,j,k,l
-      print*,grad_lapl,new,contrib
-      print*,2.0d0*contrib/dabs(grad_lapl+new+1.d-12)
-     endif 
-     accu += contrib
-    enddo
-   enddo
-  enddo
- enddo
- print*,'accu      = ',accu/count_n
- print*,'accu/rel  = ',accu_relat/count_n
-
-end
-
-subroutine routine_grad_squared
- implicit none
- integer :: i,j,k,l
- double precision :: grad_squared, get_ao_tc_sym_two_e_pot,new,accu,contrib
- double precision :: count_n,accu_relat
-! !!!!!!!!!!!!!!!!!!!!! WARNING
-! THIS ROUTINE MAKES SENSE ONLY IF HAND MODIFIED coef_gauss_eff_pot(n_max_fit_slat:n_max_fit_slat+1) = 0. to cancel exp(-'mu*r12)^2)
- accu = 0.d0
- accu_relat = 0.d0
- count_n = 0.d0
- do i = 1, ao_num
-  do j = 1, ao_num
-   do k = 1, ao_num
-    do l = 1, ao_num
-     grad_squared  = get_ao_tc_sym_two_e_pot(i,j,k,l,ao_tc_sym_two_e_pot_map) ! pure gaussian part : comes from Lapl
-     new        = tc_grad_square_ao(k,i,l,j)
-     contrib    = dabs(new - grad_squared)
-     if(dabs(grad_squared).gt.1.d-12)then
-      count_n += 1.d0
-      accu_relat += 2.0d0 * contrib/dabs(grad_squared+new)
-     endif
-     if(contrib.gt.1.d-10)then
-      print*,i,j,k,l
-      print*,grad_squared,new,contrib
-      print*,2.0d0*contrib/dabs(grad_squared+new+1.d-12)
-     endif 
-     accu += contrib
-    enddo
-   enddo
-  enddo
- enddo
- print*,'accu      = ',accu/count_n
- print*,'accu/rel  = ',accu_relat/count_n
-
-end
-
-subroutine routine_fit
- implicit none
- integer :: i,nx
- double precision :: dx,xmax,x,j_mu,j_mu_F_x_j,j_mu_fit_gauss
- nx = 500
- xmax = 5.d0
- dx = xmax/dble(nx)
- x = 0.d0
- print*,'coucou',mu_erf
- do i = 1, nx
-  write(33,'(100(F16.10,X))') x,j_mu(x),j_mu_F_x_j(x),j_mu_fit_gauss(x)
-  x += dx
- enddo
-
-end
diff --git a/src/non_hermit_dav/NEED b/src/non_hermit_dav/NEED
new file mode 100644
index 00000000..9487075c
--- /dev/null
+++ b/src/non_hermit_dav/NEED
@@ -0,0 +1 @@
+utils
diff --git a/src/non_hermit_dav/biorthog.irp.f b/src/non_hermit_dav/biorthog.irp.f
new file mode 100644
index 00000000..d6add005
--- /dev/null
+++ b/src/non_hermit_dav/biorthog.irp.f
@@ -0,0 +1,1088 @@
+subroutine non_hrmt_diag_split_degen(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  integer                       :: n_good
+  double precision              :: shift,shift_current
+  double precision              :: r,thr
+  integer,          allocatable :: list_good(:), iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+
+
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  shift = 1.d-15
+  shift_current = shift
+  iteration = 1 
+  logical :: good_ortho
+  good_ortho = .False.
+  do while(n_real_eigv.ne.n.or. .not.good_ortho)
+   if(shift.gt.1.d-3)then
+    print*,'shift > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   print*,'***** iteration = ',iteration
+   print*,'shift = ',shift
+   allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+   Aw = A_save
+   do i = 1, n
+    do j = 1, n
+     if(dabs(Aw(j,i)).lt.shift)then
+      Aw(j,i) = 0.d0
+     endif
+    enddo
+   enddo
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   allocate(im_part(n),iorder(n))
+   do i = 1, n
+    im_part(i) = -dabs(WI(i))
+    iorder(i) = i
+   enddo
+   call dsort(im_part, iorder, n)
+
+   shift_current = max(10.d0 * dabs(im_part(1)),shift)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+   Aw = A_save
+   call split_matrix_degen(Aw,n,shift_current)
+   deallocate( im_part, iorder )
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   ! You track the real eigenvalues 
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_good += 1
+     else
+       print*,'Found an imaginary component to eigenvalue'
+       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   allocate( list_good(n_good), iorder(n_good) )
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_good += 1
+       list_good(n_good) = i
+       eigval(n_good) = WR(i)
+     endif
+   enddo
+   deallocate( WR, WI )
+ 
+   n_real_eigv = n_good 
+   do i = 1, n_good
+     iorder(i) = i
+   enddo
+ 
+   ! You sort the real eigenvalues 
+   call dsort(eigval, iorder, n_good)
+ 
+   reigvec(:,:) = 0.d0 
+   leigvec(:,:) = 0.d0 
+   do i = 1, n_real_eigv
+     do j = 1, n
+       reigvec_tmp(j,i) = VR(j,list_good(iorder(i)))
+       leigvec_tmp(j,i) = Vl(j,list_good(iorder(i)))
+     enddo
+   enddo
+
+   if(n_real_eigv == n)then
+    allocate(S(n,n))
+    call check_bi_ortho(reigvec_tmp,leigvec_tmp,n,S,accu_nd)
+    print*,'accu_nd = ',accu_nd
+    double precision :: accu_nd
+    good_ortho = accu_nd .lt. 1.d-10
+    deallocate(S)
+   endif
+ 
+   deallocate( list_good, iorder )
+   deallocate( VL, VR, Aw)
+   shift *= 10.d0
+   iteration += 1
+  enddo
+  do i = 1, n
+   do j = 1, n
+    reigvec(iorder_origin(j),i) = reigvec_tmp(j,i)
+    leigvec(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+end subroutine non_hrmt_diag_split_degen
+
+! ---
+
+subroutine non_hrmt_real_diag_new(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: shift,shift_current
+  double precision              :: r,thr
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: im_part(:)
+
+
+  print*,'Computing the left/right eigenvectors ...'
+
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  shift = 1.d-10
+  do while(n_real_eigv.ne.n.or.shift.gt.1.d-3)
+   allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+   Aw = A
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   allocate(im_part(n), iorder(n))
+   do i = 1, n
+    im_part(i) = -dabs(WI(i))
+    iorder(i) = i
+   enddo
+   shift_current = max(10.d0 * dabs(im_part(1)),shift)
+   print*,'adding random number of magnitude ',shift_current
+   Aw = A
+   do i = 1, n
+     call RANDOM_NUMBER(r)
+     Aw(i,i) += shift_current * r
+   enddo
+   deallocate( im_part, iorder )
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+ 
+   ! You track the real eigenvalues 
+   thr = 1.d-10
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.thr)then
+       n_good += 1
+     else
+       print*,'Found an imaginary component to eigenvalue'
+       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+ 
+   allocate( list_good(n_good), iorder(n_good) )
+   n_good = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.thr)then
+       n_good += 1
+       list_good(n_good) = i
+       eigval(n_good) = WR(i)
+     endif
+   enddo
+ 
+   deallocate( WR, WI )
+ 
+   n_real_eigv = n_good 
+   do i = 1, n_good
+     iorder(i) = i
+   enddo
+ 
+   ! You sort the real eigenvalues 
+   call dsort(eigval, iorder, n_good)
+ 
+   reigvec(:,:) = 0.d0 
+   leigvec(:,:) = 0.d0 
+   do i = 1, n_real_eigv
+     do j = 1, n
+       reigvec(j,i) = VR(j,list_good(iorder(i)))
+       leigvec(j,i) = Vl(j,list_good(iorder(i)))
+     enddo
+   enddo
+ 
+   deallocate( list_good, iorder )
+   deallocate( VL, VR, Aw)
+   shift *= 10.d0
+  enddo
+  if(shift.gt.1.d-3)then
+   print*,'shift > 1.d-3 !!'
+   print*,'Your matrix intrinsically contains complex eigenvalues'
+  endif
+
+end subroutine non_hrmt_real_diag_new
+
+! ---
+
+subroutine non_hrmt_bieig(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: thr, thr_cut, thr_diag, thr_norm
+  double precision              :: accu_d, accu_nd, thr_d, thr_nd
+
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, ' '
+  print *, ' Computing the left/right eigenvectors ...'
+  print *, ' '
+
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n) ) 
+  
+  print *, ' fock matrix'
+  do i = 1, n
+    write(*, '(1000(F16.10,X))') A(i,:)
+  enddo
+
+  !thr_cut = 1.d-15
+  !call cancel_small_elmts(A, n, thr_cut)
+
+  !call lapack_diag_non_sym_right(n, A, WR, WI, VR)
+  call lapack_diag_non_sym(n, A, WR, WI, VL, VR)
+  !call lapack_diag_non_sym_new(n, A, WR, WI, VL, VR)
+
+  print *, ' '
+  print *, ' eigenvalues'
+  do i = 1, n
+    write(*, '(1000(F16.10,X))') WR(i), WI(i)
+  enddo
+  !print *, ' right eigenvect bef' 
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') VR(:,i)
+  !enddo
+  !print *, ' left eigenvect bef'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') VL(:,i)
+  !enddo
+
+  thr_diag = 1d-10
+  thr_norm = 1d+10
+  call check_EIGVEC(n, n, A, WR, VL, VR, thr_diag, thr_norm, .false.)
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  n_good = 0
+  thr    = 1.d-5
+  do i = 1, n
+    if(dabs(WI(i)) .lt. thr) then
+      n_good += 1
+    else
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) + Im(i)', WR(i), WI(i)
+      stop
+    endif
+  enddo
+
+  allocate(list_good(n_good), iorder(n_good))
+
+  n_good = 0
+  do i = 1, n
+    if( dabs(WI(i)).lt.thr ) then
+      n_good += 1
+      list_good(n_good) = i
+      eigval(n_good) = WR(i)
+    endif
+  enddo
+
+  deallocate( WR, WI )
+
+  n_real_eigv = n_good 
+  do i = 1, n_good
+    iorder(i) = i
+  enddo
+  call dsort(eigval, iorder, n_good)
+      
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n_real_eigv
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = VL(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+  deallocate( list_good, iorder )
+  deallocate( VL, VR )
+
+  ASSERT(n==n_real_eigv)
+
+  !print *, ' eigenvalues'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') eigval(i)
+  !enddo
+  !print *, ' right eigenvect aft ord' 
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') reigvec(:,i)
+  !enddo
+  !print *, ' left eigenvect aft ord'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') leigvec(:,i)
+  !enddo
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  thr_d  = 1d-8
+  thr_nd = 1d-8
+
+  allocate( S(n_real_eigv,n_real_eigv) )
+  call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+
+  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .lt. thr_d) ) then
+
+    print *, ' lapack vectors are normalized and bi-orthogonalized'
+    deallocate(S)
+    return
+
+  elseif( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .gt. thr_d) ) then
+
+    print *, ' lapack vectors are not normalized but bi-orthogonalized'
+    call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, .true.)
+
+    thr_diag = 1d-10
+    thr_norm = 1d+10
+    call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
+
+    deallocate(S)
+    return
+
+  else
+
+    print *, ' lapack vectors are not normalized neither bi-orthogonalized'
+
+    ! ---
+
+    !call impose_orthog_degen_eigvec(n, eigval, reigvec)
+    !call impose_orthog_degen_eigvec(n, eigval, leigvec)
+
+    call impose_biorthog_degen_eigvec(n, eigval, leigvec, reigvec)
+
+
+    !call impose_orthog_biorthog_degen_eigvec(n, eigval, leigvec, reigvec)
+
+    !call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, ao_overlap, leigvec, reigvec)
+
+    ! ---
+
+    call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+    if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n_real_eigv)) .gt. thr_d) ) then
+      call check_biorthog_binormalize(n, n_real_eigv, leigvec, reigvec, .true.)
+    endif
+    call check_biorthog(n, n_real_eigv, leigvec, reigvec, accu_d, accu_nd, S, .true.)
+
+    !call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec)
+    !call impose_biorthog_lu(n, n_real_eigv, leigvec, reigvec)
+
+    ! ---
+
+    thr_diag = 1d-10
+    thr_norm = 1d+10
+    call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, thr_diag, thr_norm, .true.)
+
+    deallocate(S)
+
+  endif
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  return
+
+end subroutine non_hrmt_bieig
+
+! ---
+
+subroutine non_hrmt_bieig_random_diag(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+  double precision :: r
+
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) )
+
+  Aw(:,:) = A(:,:)
+  call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR)
+
+  thr    = 1.d-12
+  double precision, allocatable :: im_part(:)
+  n_good = 0
+  do i = 1, n
+    if( dabs(WI(i)).lt.thr ) then
+      n_good += 1
+    else
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+  print*,'n_good = ',n_good
+  if(n_good .lt. n)then
+   print*,'Removing degeneracies to remove imaginary parts'
+   allocate(im_part(n),iorder(n))
+   r = 0.d0
+   do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+   enddo
+   call dsort(im_part,iorder,n) 
+   thr = 10.d0 * dabs(im_part(1))
+   print*,'adding random numbers on the diagonal of magnitude ',thr
+   Aw(:,:) = A(:,:)
+   do i = 1, n
+     call RANDOM_NUMBER(r)
+     print*,'r = ',r*thr
+     Aw(i,i) += thr * r
+   enddo
+   print*,'Rediagonalizing the matrix with random numbers'
+   call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR)
+   deallocate(im_part,iorder)
+  endif
+  deallocate( Aw )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  n_good = 0
+  thr    = 1.d-5
+  do i = 1, n
+    if( dabs(WI(i)).lt.thr ) then
+      n_good += 1
+    else
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+  print*,'n_good = ',n_good
+  allocate( list_good(n_good), iorder(n_good) )
+
+  n_good = 0
+  do i = 1, n
+    if( dabs(WI(i)).lt.thr ) then
+      n_good += 1
+      list_good(n_good) = i
+      eigval(n_good) = WR(i)
+    endif
+  enddo
+
+  deallocate( WR, WI )
+
+  n_real_eigv = n_good 
+  do i = 1, n_good
+    iorder(i) = i
+  enddo
+  call dsort(eigval, iorder, n_good)
+      
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n_real_eigv
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = VL(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+  deallocate( list_good, iorder )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n_real_eigv,n_real_eigv) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n, 1.d0          &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n_real_eigv
+    do j = 1, n_real_eigv
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if(accu_nd .lt. 1d-8) then
+    ! L x R is already bi-orthogonal
+
+    print *, ' L & T bi-orthogonality: ok'
+    deallocate( S )
+    return
+
+  else
+    ! impose bi-orthogonality 
+
+    print *, ' L & T bi-orthogonality: not imposed yet'
+    print *, ' accu_nd = ', accu_nd
+    call impose_biorthog_qr(n, n_real_eigv, leigvec, reigvec)
+    deallocate( S )
+  
+  endif
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  return
+
+end 
+
+! ---
+
+subroutine non_hrmt_real_im(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_bad
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+  double precision :: r
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), iorder(n))
+
+  Aw(:,:) = A(:,:)
+   do i = 1, n
+     call RANDOM_NUMBER(r)
+     Aw(i,i) += 10.d-10* r
+   enddo
+  call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR)
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  i = 1
+  thr    = 1.d-15
+  n_real_eigv = 0
+  do while (i.le.n) 
+!    print*,i,dabs(WI(i))
+    if( dabs(WI(i)).gt.thr ) then
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) , Im(i)  ', WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)
+      i+=1
+      print*, 'Re(i+1),Im(i+1)',WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)
+      i+=1
+    else  
+      n_real_eigv += 1
+      iorder(i) = i
+      eigval(i) = WR(i)
+      i+=1
+    endif
+  enddo
+  call dsort(eigval, iorder, n)
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n
+    do j = 1, n
+      reigvec(j,i) = VR(j,iorder(i))
+      leigvec(j,i) = VL(j,iorder(i))
+    enddo
+  enddo
+
+  deallocate( iorder )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n,n) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0          &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  deallocate( S )
+
+end subroutine non_hrmt_real_im
+
+! ---
+
+subroutine non_hrmt_generalized_real_im(n, A, B, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the EIGENVALUES sorted the REAL part and corresponding LEFT/RIGHT eigenvetors 
+  ! for A R = lambda B R and A^\dagger L = lambda B^\dagger L
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n),B(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_bad
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: Aw(:,:),Bw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:), beta(:)
+  double precision, allocatable :: S(:,:)
+  double precision :: r
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), Bw(n,n),iorder(n),beta(n))
+
+  Aw(:,:) = A(:,:)
+  Bw(:,:) = B(:,:)
+  call lapack_diag_general_non_sym(n,Aw,Bw,WR,beta,WI,VL,VR)
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  i = 1
+  thr    = 1.d-10
+  n_real_eigv = 0
+  do while (i.le.n) 
+    if( dabs(WI(i)).gt.thr ) then
+      print*, 'Found an imaginary component to eigenvalue on i = ', i
+      print*, 'Re(i) , Im(i)  ', WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)/(beta(i) + 1.d-10)
+      i+=1
+      print*, 'Re(i+1),Im(i+1)',WR(i), WI(i)
+      iorder(i) = i
+      eigval(i) = WR(i)/(beta(i) + 1.d-10)
+      i+=1
+    else  
+      n_real_eigv += 1
+      iorder(i) = i
+      eigval(i) = WR(i)/(beta(i) + 1.d-10)
+      i+=1
+    endif
+  enddo
+  call dsort(eigval, iorder, n)
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n
+    do j = 1, n
+      reigvec(j,i) = VR(j,iorder(i))
+      leigvec(j,i) = VL(j,iorder(i))
+    enddo
+  enddo
+
+  deallocate( iorder )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n,n) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0          &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  deallocate( S )
+
+end subroutine non_hrmt_generalized_real_im
+
+! ---
+
+subroutine non_hrmt_bieig_fullvect(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! 
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  double precision              :: thr
+  double precision              :: accu_nd
+
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:)
+  double precision, allocatable :: S(:,:)
+  double precision, allocatable :: eigval_sorted(:)
+
+
+  ! -------------------------------------------------------------------------------------
+  !
+
+  print *, 'Computing the left/right eigenvectors ...'
+
+  allocate( WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n) )
+  Aw(:,:) = A(:,:)
+
+  call lapack_diag_non_sym_new(n, Aw, WR, WI, VL, VR)
+
+  deallocate( Aw )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                  track & sort the real eigenvalues 
+
+  allocate( eigval_sorted(n), iorder(n) )
+
+  n_good = 0
+  thr    = 1.d-10
+
+  do i = 1, n
+
+    iorder(i) = i
+    eigval_sorted(i) = WR(i)
+
+    if(dabs(WI(i)) .gt. thr) then
+      print*, ' Found an imaginary component to eigenvalue on i = ', i
+      print*, ' Re(i) + Im(i)', WR(i), WI(i)
+    else
+      n_good += 1
+    endif
+
+  enddo
+
+  n_real_eigv = n_good 
+
+  call dsort(eigval_sorted, iorder, n)
+      
+  reigvec(:,:) = 0.d0 
+  leigvec(:,:) = 0.d0 
+  do i = 1, n
+    eigval(i) = WR(i)
+    do j = 1, n
+      reigvec(j,i) = VR(j,iorder(i))
+      leigvec(j,i) = VL(j,iorder(i))
+    enddo
+  enddo
+
+  deallocate( eigval_sorted, iorder )
+  deallocate( WR, WI )
+  deallocate( VL, VR )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               check bi-orthogonality
+
+  allocate( S(n,n) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  if( accu_nd .lt. 1d-8 ) then
+    ! L x R is already bi-orthogonal
+
+    !print *, ' L & T bi-orthogonality: ok'
+    deallocate( S )
+    return
+
+  else
+    ! impose bi-orthogonality 
+
+    !print *, ' L & T bi-orthogonality: not imposed yet'
+    !print *, ' accu_nd = ', accu_nd
+    call impose_biorthog_qr(n, n, leigvec, reigvec)
+    deallocate( S )
+  
+  endif
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  return
+
+end subroutine non_hrmt_bieig_fullvect
+
+! ---
+
+
+subroutine split_matrix_degen(aw,n,shift)
+ implicit none
+ BEGIN_DOC
+ ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2
+ !
+ ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS
+ END_DOC
+ double precision,intent(inout) :: Aw(n,n)
+ double precision,intent(in)    :: shift
+ integer, intent(in) :: n
+ integer :: i,j,n_degen
+ logical :: keep_on
+ i=1
+ do while(i.lt.n)
+  if(dabs(Aw(i,i)-Aw(i+1,i+1)).lt.shift)then
+   j=1
+   keep_on = .True.
+   do while(keep_on)
+    if(i+j.gt.n)then
+     keep_on = .False.
+     exit
+    endif
+    if(dabs(Aw(i,i)-Aw(i+j,i+j)).lt.shift)then
+     j+=1
+    else
+     keep_on=.False.
+     exit
+    endif
+   enddo
+   n_degen = j
+   j=0
+   keep_on = .True.
+   do while(keep_on)
+    if(i+j+1.gt.n)then
+     keep_on = .False.
+     exit
+    endif
+    if(dabs(Aw(i+j,i+j)-Aw(i+j+1,i+j+1)).lt.shift)then
+     Aw(i+j,i+j) += (j-n_degen/2) * shift
+     j+=1
+    else 
+     keep_on = .False.
+     exit
+    endif
+   enddo
+   Aw(i+n_degen-1,i+n_degen-1) += (n_degen-1-n_degen/2) * shift
+   i+=n_degen
+  else 
+   i+=1
+  endif
+ enddo
+
+end
+
+subroutine cancel_small_elmts(aw,n,shift)
+ implicit none
+ BEGIN_DOC
+ ! subroutines that splits the degeneracies of a matrix by adding a splitting of magnitude thr * n_degen/2
+ !
+ ! WARNING !! THE MATRIX IS ASSUMED TO BE PASSED WITH INCREASING DIAGONAL ELEMENTS
+ END_DOC
+ double precision,intent(inout) :: Aw(n,n)
+ double precision,intent(in)    :: shift
+ integer, intent(in) :: n
+ integer :: i,j
+ do i = 1, n
+  do j = 1, n
+   if(dabs(Aw(j,i)).lt.shift)then
+    Aw(j,i) = 0.d0
+   endif
+  enddo
+ enddo
+end
+
+subroutine check_bi_ortho(reigvec,leigvec,n,S,accu_nd)
+ implicit none
+ integer, intent(in) :: n
+ double precision,intent(in) :: reigvec(n,n),leigvec(n,n)
+ double precision, intent(out) :: S(n,n),accu_nd
+ BEGIN_DOC
+! retunrs the overlap matrix S = Leigvec^T Reigvec 
+!
+! and the square root of the sum of the squared off-diagonal elements of S
+ END_DOC
+ integer :: i,j
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n, n, n, 1.d0 &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1)  &
+            , 0.d0, S, size(S, 1) )
+  accu_nd = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i.ne.j) then
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+end
diff --git a/src/non_hermit_dav/gram_schmit.irp.f b/src/non_hermit_dav/gram_schmit.irp.f
new file mode 100644
index 00000000..520661b8
--- /dev/null
+++ b/src/non_hermit_dav/gram_schmit.irp.f
@@ -0,0 +1,56 @@
+subroutine bi_ortho_gram_schmidt(wi,vi,n,ni,wk,wk_schmidt)
+ implicit none
+ BEGIN_DOC
+! you enter with a set of "ni" BI-ORTHONORMAL vectors of length "n" 
+!
+! vi(j,i) = <j|vi>, wi(j,i) = <j|wi>, <vi|wj> = delta_{ij} S_ii, S_ii =<vi|wi>
+!
+! and a vector vk(j) = <j|vk>
+!
+! you go out with a vector vk_schmidt(j) = <j|vk_schmidt> 
+!
+! which is Gram-Schmidt orthonormalized with respect to the "vi"
+!
+! <vi|wk_schmidt> = 0 
+!
+! |wk_schmidt> = |wk> - \sum_{i=1}^ni (<vi|wk>/<vi|wi>) |wi> 
+!
+! according to Eq. (5), (6) of Computers Structures, Vol 56, No. 4, pp 605-613, 1995
+!
+! https://doi.org/10.1016/0045-7949(94)00565-K
+ END_DOC
+ integer, intent(in) :: n,ni
+ double precision, intent(in) :: wi(n,ni),vi(n,ni),wk(n)
+ double precision, intent(out):: wk_schmidt(n)
+ double precision :: vi_wk,u_dot_v,tmp,u_dot_u
+ double precision, allocatable :: sii(:)
+ integer :: i,j
+ allocate( sii(ni) )
+ wk_schmidt = wk
+ do i = 1, ni
+  sii(i) = u_dot_v(vi(1,i),wi(1,i),n)
+ enddo
+! do i = 1, n
+!  print*,i,'wk',wk(i)
+! enddo
+! print*,''
+! print*,''
+ do i = 1, ni
+!  print*,'i',i
+  ! Gram-Schmidt 
+  vi_wk = u_dot_v(vi(1,i),wk,n)
+  vi_wk = vi_wk / sii(i)
+!  print*,''
+  do j = 1, n
+!   print*,j,vi_wk,wi(j,i)
+   wk_schmidt(j) -= vi_wk * wi(j,i)
+  enddo
+ enddo
+ tmp = u_dot_u(wk_schmidt,n)
+ tmp = 1.d0/dsqrt(tmp)
+ wk_schmidt = tmp * wk_schmidt
+! do j = 1, n
+!  print*,j,'wk_scc',wk_schmidt(j)
+! enddo
+! pause
+end
diff --git a/src/non_hermit_dav/htilde_mat.irp.f b/src/non_hermit_dav/htilde_mat.irp.f
new file mode 100644
index 00000000..6d5101ac
--- /dev/null
+++ b/src/non_hermit_dav/htilde_mat.irp.f
@@ -0,0 +1,93 @@
+BEGIN_PROVIDER [ integer, n_mat]
+ implicit none
+ n_mat = 2
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, h_non_hermit, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, h_non_hermit_transp, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, reigvec_ht, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, leigvec_ht, (n_mat, n_mat)]
+&BEGIN_PROVIDER [ double precision, eigval_ht, (n_mat)]
+&BEGIN_PROVIDER [ integer, n_real_ht, (n_mat)]
+ implicit none
+ integer :: i,j
+ do i = 1, n_mat
+  read(33,*)h_non_hermit(i,1:n_mat)
+ enddo
+ print*,''
+ print*,'H_mat '
+ print*,''
+ do i = 1, n_mat
+  write(*,'(1000(F16.10,X))')h_non_hermit(i,:)
+ enddo
+ do i = 1, n_mat
+  do j = 1, n_mat
+   h_non_hermit_transp(j,i) = h_non_hermit(i,j)
+  enddo
+ enddo
+ call non_hrmt_real_diag(n_mat,h_non_hermit,reigvec_ht,leigvec_ht,n_real_ht,eigval_ht)
+
+
+END_PROVIDER 
+
+
+subroutine hcalc_r_tmp(v,u,N_st,sze) ! v = H u
+  implicit none
+  BEGIN_DOC
+  ! Template of routine for the application of H
+  !
+  ! Here, it is done with the Hamiltonian matrix 
+  !
+  ! on the set of determinants of psi_det 
+  !
+  ! Computes $v = H | u \rangle$ 
+  !
+  END_DOC
+  integer, intent(in)              :: N_st,sze
+  double precision, intent(in)     :: u(sze,N_st)
+  double precision, intent(inout)  :: v(sze,N_st)
+  integer :: i,j,istate
+  v = 0.d0
+  do istate = 1, N_st
+   do j = 1, sze
+    do i = 1, sze
+      v(i,istate) += h_non_hermit(i,j) * u(j,istate)
+!      print*,i,j,h_non_hermit(i,j),u(j,istate)
+    enddo
+   enddo
+  enddo
+  print*,'HU'
+  do i = 1, sze
+   print*,v(i,1)
+  enddo
+end
+
+subroutine hcalc_l_tmp(v,u,N_st,sze) ! v = H^\dagger u
+  implicit none
+  BEGIN_DOC
+  ! Template of routine for the application of H
+  !
+  ! Here, it is done with the Hamiltonian matrix 
+  !
+  ! on the set of determinants of psi_det 
+  !
+  ! Computes $v = H | u \rangle$ 
+  !
+  END_DOC
+  integer, intent(in)              :: N_st,sze
+  double precision, intent(in)     :: u(sze,N_st)
+  double precision, intent(inout)  :: v(sze,N_st)
+  integer :: i,j,istate
+  v = 0.d0
+  do istate = 1, N_st
+   do j = 1, sze
+    do i = 1, sze
+      v(i,istate) += h_non_hermit_transp(i,j) * u(j,istate)
+    enddo
+   enddo
+  enddo
+  print*,'HU'
+  do i = 1, sze
+   print*,v(i,1)
+  enddo
+end
diff --git a/src/non_hermit_dav/lapack_diag_non_hermit.irp.f b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
new file mode 100644
index 00000000..5bd227c6
--- /dev/null
+++ b/src/non_hermit_dav/lapack_diag_non_hermit.irp.f
@@ -0,0 +1,2391 @@
+subroutine lapack_diag_non_sym(n, A, WR, WI, VL, VR)
+
+  BEGIN_DOC
+  ! You enter with a general non hermitian matrix A(n,n) 
+  !
+  ! You get out with the real WR and imaginary part WI of the eigenvalues 
+  !
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  !
+  ! And the left VL and right VR eigenvectors 
+  !
+  ! VL(i,j) = <i|Psi_left(j)>  :: projection on the basis element |i> on the jth left  eigenvector 
+  !
+  ! VR(i,j) = <i|Psi_right(j)> :: projection on the basis element |i> on the jth right eigenvector 
+  !
+  ! The real part of the matrix A can be written as A = VR D VL^T
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n)
+
+  integer                       :: lda, ldvl, ldvr, LWORK, INFO
+  double precision, allocatable :: Atmp(:,:), WORK(:)
+
+  lda  = n
+  ldvl = n
+  ldvr = n
+
+  allocate( Atmp(n,n) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  allocate(WORK(1))
+  LWORK = -1 ! to ask for the optimal size of WORK
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.gt.0)then
+    print*,'dgeev failed !!',INFO
+    stop
+  endif
+  LWORK = max(int(WORK(1)), 1) ! this is the optimal size of WORK 
+  deallocate(WORK)
+
+  allocate(WORK(LWORK))
+
+  ! Actual diagonalization 
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.ne.0) then
+    print*,'dgeev failed !!', INFO
+    stop
+  endif
+
+  deallocate(Atmp, WORK)
+
+end subroutine lapack_diag_non_sym
+
+
+subroutine non_sym_diag_inv_right(n,A,leigvec,reigvec,n_real_eigv,eigval)
+ implicit none
+ BEGIN_DOC
+! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+!
+! of a non hermitian matrix A(n,n)
+!
+! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+ END_DOC
+ integer, intent(in) :: n
+ double precision, intent(in) :: A(n,n)
+ double precision, intent(out) :: reigvec(n,n),leigvec(n,n),eigval(n)
+ double precision, allocatable :: Aw(:,:)
+ integer, intent(out) :: n_real_eigv
+ print*,'Computing the left/right eigenvectors ...'
+ character*1 :: JOBVL,JOBVR
+ JOBVL = "V" ! computes the left  eigenvectors 
+ JOBVR = "V" ! computes the right eigenvectors 
+ double precision, allocatable :: WR(:),WI(:),Vl(:,:),VR(:,:),S(:,:),inv_reigvec(:,:)
+ integer :: i,j
+ integer :: n_good
+ integer, allocatable :: list_good(:), iorder(:)
+ double precision :: thr
+ thr = 1.d-10
+ ! Eigvalue(n) = WR(n) + i * WI(n)
+ allocate(WR(n),WI(n),VL(n,n),VR(n,n),Aw(n,n))
+ Aw = A
+ do i = 1, n
+  do j = i+1, n
+   if(dabs(Aw(j,j)-Aw(i,i)).lt.thr)then
+     Aw(j,j)+= thr
+     Aw(i,i)-= thr
+!    if(Aw(j,i) * A(i,j) .lt.0d0  )then
+!     if(dabs(Aw(j,i) * A(i,j)).lt.thr**(1.5d0))then
+!      print*,Aw(j,j),Aw(i,i)
+!      print*,Aw(j,i) , A(i,j)
+      Aw(j,i) = 0.d0
+      Aw(i,j) = Aw(j,i)
+!     endif
+!    endif
+   endif
+  enddo
+ enddo
+ call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+ ! You track the real eigenvalues 
+ n_good = 0
+! do i = 1, n
+!  write(*,'(100(F16.12,X))')A(:,i)
+! enddo
+ do i = 1, n
+  print*,'Im part of lambda = ',dabs(WI(i))
+  if(dabs(WI(i)).lt.thr)then
+   n_good += 1
+  else
+   print*,'Found an imaginary component to eigenvalue'
+   print*,'Re(i) + Im(i)',WR(i),WI(i)
+   write(*,'(100(F10.5,X))')VR(:,i)
+   write(*,'(100(F10.5,X))')VR(:,i+1)
+   write(*,'(100(F10.5,X))')VL(:,i)
+   write(*,'(100(F10.5,X))')VL(:,i+1)
+  endif
+ enddo
+ allocate(list_good(n_good),iorder(n_good))
+ n_good = 0
+ do i = 1, n
+  if(dabs(WI(i)).lt.thr)then
+   n_good += 1
+   list_good(n_good) = i
+   eigval(n_good) = WR(i)
+  endif
+ enddo
+ n_real_eigv = n_good 
+ do i = 1, n_good
+  iorder(i) = i
+ enddo
+ ! You sort the real eigenvalues 
+ call dsort(eigval,iorder,n_good)
+ do i = 1, n_real_eigv
+  do j = 1, n
+   reigvec(j,i) = VR(j,list_good(iorder(i)))
+   leigvec(j,i) = VL(j,list_good(iorder(i)))
+  enddo
+ enddo
+ allocate(inv_reigvec(n_real_eigv,n_real_eigv))
+! call get_pseudo_inverse(reigvec,n_real_eigv,n_real_eigv,n_real_eigv,inv_reigvec,n_real_eigv,thr)
+! do i = 1, n_real_eigv
+!  do j = 1, n
+!   leigvec(j,i) = inv_reigvec(i,j)
+!  enddo
+! enddo
+ allocate( S(n_real_eigv,n_real_eigv) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+   do i = 1,n_real_eigv
+    write(*,'(100(F10.5,X))')S(:,i)
+   enddo
+! call lapack_diag_non_sym(n,S,WR,WI,VL,VR)
+! print*,'Eigenvalues of S'
+! do i = 1, n
+!  print*,WR(i),dabs(WI(i))
+! enddo
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+! call get_inv_half_svd(S, n_real_eigv, inv_reigvec)
+
+  double precision :: accu_d,accu_nd
+  accu_nd = 0.d0
+  accu_d = 0.d0
+  do i = 1, n_real_eigv
+    do j = 1, n_real_eigv
+      if(i==j) then
+       accu_d += S(j,i) * S(j,i)
+      else
+       accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  print*,'accu_nd = ',accu_nd
+  if( accu_nd .lt. 1d-10 ) then
+    ! L x R is already bi-orthogonal
+    !print *, ' L & T bi-orthogonality: ok'
+    return
+  else
+   print*,'PB with bi-orthonormality!!'
+   stop
+  endif
+end
+
+subroutine lapack_diag_non_sym_new(n, A, WR, WI, VL, VR)
+
+  BEGIN_DOC
+  !
+  ! You enter with a general non hermitian matrix A(n,n) 
+  !
+  ! You get out with the real WR and imaginary part WI of the eigenvalues 
+  !
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  !
+  ! And the left VL and right VR eigenvectors 
+  !
+  ! VL(i,j) = <i|Psi_left(j)>  :: projection on the basis element |i> on the jth left  eigenvector 
+  !
+  ! VR(i,j) = <i|Psi_right(j)> :: projection on the basis element |i> on the jth right eigenvector 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n)
+
+  character*1                   :: JOBVL,JOBVR,BALANC,SENSE
+  integer                       :: ILO, IHI
+  integer                       :: lda, ldvl, ldvr, LWORK, INFO
+  double precision              :: ABNRM
+  integer,          allocatable :: IWORK(:)
+  double precision, allocatable :: WORK(:), SCALE_array(:), RCONDE(:), RCONDV(:)
+  double precision, allocatable :: Atmp(:,:)
+
+  allocate( Atmp(n,n) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  JOBVL  = "V" ! computes the left  eigenvectors 
+  JOBVR  = "V" ! computes the right eigenvectors 
+  BALANC = "B" ! Diagonal scaling and Permutation for optimization
+  SENSE  = "B"
+  lda  = n
+  ldvl = n
+  ldvr = n
+  allocate(WORK(1),SCALE_array(n),RCONDE(n),RCONDV(n),IWORK(2*n-2))
+  LWORK = -1 ! to ask for the optimal size of WORK
+  call dgeevx(BALANC,JOBVL,JOBVR,SENSE,&  ! CHARACTERS 
+              n,Atmp,lda,              &  ! MATRIX TO DIAGONALIZE
+              WR,WI,                   &  ! REAL AND IMAGINARY PART OF EIGENVALUES 
+              VL,ldvl,VR,ldvr,         &  ! LEFT AND RIGHT EIGENVECTORS 
+              ILO,IHI,SCALE_array,ABNRM,RCONDE,RCONDV, & ! OUTPUTS OF OPTIMIZATION
+              WORK,LWORK,IWORK,INFO)
+
+  !if(INFO.gt.0)then
+  ! print*,'dgeev failed !!',INFO
+  if( INFO.ne.0 ) then
+    print *, 'dgeevx failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(int(work(1)), 1) ! this is the optimal size of WORK 
+  deallocate(WORK)
+  allocate(WORK(LWORK))
+  ! Actual dnon_hrmt_real_diag_newiagonalization 
+  call dgeevx(BALANC,JOBVL,JOBVR,SENSE,&  ! CHARACTERS 
+              n,Atmp,lda,              &  ! MATRIX TO DIAGONALIZE
+              WR,WI,                   &  ! REAL AND IMAGINARY PART OF EIGENVALUES 
+              VL,ldvl,VR,ldvr,         &  ! LEFT AND RIGHT EIGENVECTORS 
+              ILO,IHI,SCALE_array,ABNRM,RCONDE,RCONDV, & ! OUTPUTS OF OPTIMIZATION
+              WORK,LWORK,IWORK,INFO)
+
+  !if(INFO.ne.0)then
+  ! print*,'dgeev failed !!',INFO
+  if( INFO.ne.0 ) then
+    print *, 'dgeevx failed !!', INFO
+    stop
+  endif
+
+  deallocate( Atmp )
+  deallocate( WORK, SCALE_array, RCONDE, RCONDV, IWORK )
+
+end subroutine lapack_diag_non_sym_new
+
+! ---
+
+subroutine lapack_diag_non_sym_right(n, A, WR, WI, VR)
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VR(n,n)
+
+  integer                       :: i, lda, ldvl, ldvr, LWORK, INFO
+  double precision, allocatable :: Atmp(:,:), WORK(:), VL(:,:)
+
+  lda  = n
+  ldvl = 1
+  ldvr = n
+
+  allocate( Atmp(n,n), VL(1,1) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  allocate(WORK(1))
+  LWORK = -1
+  call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.gt.0)then
+    print*,'dgeev failed !!',INFO
+    stop
+  endif
+
+  LWORK = max(int(WORK(1)), 1) ! this is the optimal size of WORK 
+  deallocate(WORK)
+
+  allocate(WORK(LWORK))
+
+  ! Actual diagonalization 
+  call dgeev('N', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.ne.0) then
+    print*,'dgeev failed !!', INFO
+    stop
+  endif
+
+  deallocate(Atmp, WORK, VL)
+
+! print *, ' JOBL = F'
+! print *, ' eigenvalues'
+! do i = 1, n
+!   write(*, '(1000(F16.10,X))') WR(i), WI(i)
+! enddo
+! print *, ' right eigenvect' 
+! do i = 1, n
+!   write(*, '(1000(F16.10,X))') VR(:,i)
+! enddo
+
+end subroutine lapack_diag_non_sym_right
+
+! ---
+
+subroutine non_hrmt_real_diag(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j, n_good
+  double precision              :: thr, threshold, accu_d, accu_nd
+  integer,          allocatable :: list_good(:), iorder(:)
+  double precision, allocatable :: Aw(:,:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:), S(:,:), S_inv_half_tmp(:,:)
+
+  print*, ' Computing the left/right eigenvectors with lapack ...'
+
+  ! Eigvalue(n) = WR(n) + i * WI(n)
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  Aw = A
+  call lapack_diag_non_sym(n, Aw, WR, WI, VL, VR)
+
+  ! ---
+  ! You track the real eigenvalues 
+
+  thr = 1d-15
+
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.thr) then
+      n_good += 1
+    else
+      print*, ' Found an imaginary component to eigenvalue'
+      print*, ' Re(i) + Im(i)', WR(i), WI(i)
+    endif
+  enddo
+
+  allocate(list_good(n_good), iorder(n_good))
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.thr) then
+      n_good += 1
+      list_good(n_good) = i
+      eigval(n_good) = WR(i)
+    endif
+  enddo
+  n_real_eigv = n_good
+  do i = 1, n_good
+   iorder(i) = i
+  enddo
+
+  ! You sort the real eigenvalues 
+  call dsort(eigval, iorder, n_good)
+  do i = 1, n_real_eigv
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = Vl(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+! print *, ' ordered eigenvalues'
+! print *, ' right eigenvect' 
+! do i = 1, n
+!   print *, i, eigval(i)
+!   write(*, '(1000(F16.10,X))') reigvec(:,i)
+! enddo
+
+  ! ---
+
+  allocate( S(n_real_eigv,n_real_eigv), S_inv_half_tmp(n_real_eigv,n_real_eigv) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', n_real_eigv, n_real_eigv, n_real_eigv, 1.d0 &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1)  &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n_real_eigv
+    do j = 1, n_real_eigv
+      if(i==j) then
+        accu_d += S(j,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  threshold = 1.d-15
+  if( (accu_nd .gt. threshold) .or. (dabs(accu_d-dble(n_real_eigv)) .gt. threshold) ) then
+
+    print*, ' sum of off-diag S elements = ', accu_nd
+    print*, ' Should be zero '
+    print*, ' sum of     diag S elements = ', accu_d
+    print*, ' Should be ',n
+    print*, ' Not bi-orthonormal !!'
+    print*, ' Notice that if you are interested in ground state it is not a problem :)'
+  endif
+
+end subroutine non_hrmt_real_diag
+
+! ---
+
+subroutine lapack_diag_general_non_sym(n, A, B, WR, WI, VL, VR)
+
+  BEGIN_DOC
+  ! You enter with a general non hermitian matrix A(n,n) and another B(n,n)
+  !
+  ! You get out with the real WR and imaginary part WI of the eigenvalues 
+  !
+  ! Eigvalue(n) = (WR(n) + i * WI(n))
+  !
+  ! And the left VL and right VR eigenvectors 
+  !
+  ! VL(i,j) = <i|Psi_left(j)>  :: projection on the basis element |i> on the jth left  eigenvector 
+  !
+  ! VR(i,j) = <i|Psi_right(j)> :: projection on the basis element |i> on the jth right eigenvector 
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n), B(n,n)
+  double precision, intent(out) :: WR(n), WI(n), VL(n,n), VR(n,n)
+
+  integer                       :: lda, ldvl, ldvr, LWORK, INFO
+  integer                       :: n_good
+  double precision, allocatable :: WORK(:)
+  double precision, allocatable :: Atmp(:,:)
+
+  lda  = n
+  ldvl = n
+  ldvr = n
+
+  allocate( Atmp(n,n) )
+  Atmp(1:n,1:n) = A(1:n,1:n)
+
+  allocate(WORK(1))
+  LWORK = -1 
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.gt.0) then
+    print*,'dgeev failed !!',INFO
+    stop
+  endif
+
+  LWORK = max(int(WORK(1)), 1) 
+  deallocate(WORK)
+
+  allocate(WORK(LWORK))
+
+  call dgeev('V', 'V', n, Atmp, lda, WR, WI, VL, ldvl, VR, ldvr, WORK, LWORK, INFO)
+  if(INFO.ne.0) then
+    print*,'dgeev failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, Atmp )
+
+end subroutine lapack_diag_general_non_sym
+
+! ---
+
+subroutine non_hrmt_general_real_diag(n, A, B, reigvec, leigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n) and B(n,n) 
+  !
+  ! A reigvec = eigval * B * reigvec
+  !
+  ! (A)^\dagger leigvec = eigval * B * leigvec
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  END_DOC
+
+  implicit none
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n), B(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+
+  integer                       :: i, j
+  integer                       :: n_good
+  integer, allocatable          :: list_good(:), iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:)
+  double precision, allocatable :: Aw(:,:), Bw(:,:)
+
+  print*,'Computing the left/right eigenvectors ...'
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n), Bw(n,n))
+  Aw = A
+  Bw = B
+
+  call lapack_diag_general_non_sym(n, A, B, WR, WI, VL, VR)
+
+  ! You track the real eigenvalues 
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)) .lt. 1.d-12) then
+      n_good += 1
+    else
+      print*,'Found an imaginary component to eigenvalue'
+      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+
+  allocate(list_good(n_good), iorder(n_good))
+  n_good = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-12)then
+      n_good += 1
+      list_good(n_good) = i
+      eigval(n_good) = WR(i)
+    endif
+  enddo
+  n_real_eigv = n_good 
+  do i = 1, n_good
+   iorder(i) = i
+  enddo
+
+  ! You sort the real eigenvalues 
+  call dsort(eigval, iorder, n_good)
+  print*,'n_real_eigv = ', n_real_eigv
+  print*,'n           = ', n
+  do i = 1, n_real_eigv
+    print*,i,'eigval(i) = ', eigval(i) 
+    do j = 1, n
+      reigvec(j,i) = VR(j,list_good(iorder(i)))
+      leigvec(j,i) = Vl(j,list_good(iorder(i)))
+    enddo
+  enddo
+
+end subroutine non_hrmt_general_real_diag
+
+! ---
+
+subroutine impose_biorthog_qr(m, n, Vl, Vr)
+
+  implicit none 
+  integer, intent(in)             :: m, n
+  double precision, intent(inout) :: Vl(m,n), Vr(m,n)
+
+  integer                         :: i, j
+  integer                         :: LWORK, INFO
+  double precision                :: accu_nd, accu_d, thr_nd, thr_d
+  double precision, allocatable   :: TAU(:), WORK(:)
+  double precision, allocatable   :: S(:,:), R(:,:), tmp(:,:)
+
+  ! ---
+
+  call check_biorthog_binormalize(m, n, Vl, Vr, .false.)
+  
+  ! ---
+  
+  allocate(S(n,n))
+  call dgemm( 'T', 'N', n, n, m, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+    do j = 1, n
+      if(i==j) then
+        accu_d += S(j,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  thr_d  = 1d-10
+  thr_nd = 1d-12
+  if((accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(n)) .lt. thr_d)) then
+    print *, ' bi-orthogonal vectors without QR !'
+    deallocate(S)
+    return
+  endif
+
+  ! -------------------------------------------------------------------------------------
+  !                           QR factorization of S: S = Q x R
+
+
+  print *, ' apply QR decomposition ...'
+
+  allocate( TAU(n), WORK(1) )
+
+  LWORK = -1
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  ! save the upper triangular R
+  allocate( R(n,n) )
+  R(:,:) = S(:,:)
+
+  ! get Q
+  LWORK = -1
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, TAU )
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               get bi-orhtog left & right vectors:
+  !                                           Vr' = Vr x inv(R) 
+  !                                           Vl' = inv(Q) x Vl =  Q.T   x Vl 
+
+  ! Q.T x Vl, where Q = S
+
+  allocate( tmp(n,m) )
+  call dgemm( 'T', 'T', n, m, n, 1.d0        &
+            , S, size(S, 1), Vl, size(Vl, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  do i = 1, n
+    do j = 1, m
+      Vl(j,i) = tmp(i,j)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  ! ---
+
+  ! inv(R) 
+  !print *, ' inversing upper triangular matrix ...'
+  call dtrtri("U", "N", n, R, n, INFO)
+  if(INFO .ne. 0) then
+    print*,'dtrtri failed !!', INFO
+    stop
+  endif
+  !print *, ' inversing upper triangular matrix OK' 
+
+  do i = 1, n-1
+    do j = i+1, n
+      R(j,i) = 0.d0
+    enddo
+  enddo
+
+  !print *, ' inv(R):'
+  !do i = 1, n
+  !  write(*, '(1000(F16.10,X))') R(i,:)
+  !enddo
+
+  ! Vr x inv(R) 
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'N', m, n, n, 1.d0        &
+            , Vr, size(Vr, 1), R, size(R, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate( R )
+
+  do i = 1, n
+    do j = 1, m
+      Vr(j,i) = tmp(j,i)
+    enddo
+  enddo
+  deallocate(tmp)
+
+  return
+end subroutine impose_biorthog_qr
+
+! ---
+
+subroutine impose_biorthog_lu(m, n, Vl, Vr, S)
+
+  implicit none 
+  integer, intent(in)             :: m, n
+  double precision, intent(inout) :: Vl(m,n), Vr(m,n), S(n,n)
+
+  integer                         :: i, j
+  integer                         :: INFO
+  double precision                :: nrm
+  integer,          allocatable   :: IPIV(:)
+  double precision, allocatable   :: L(:,:), tmp(:,:), vectmp(:)
+  !double precision, allocatable   :: T(:,:), ll(:,:), rr(:,:), tt(:,:)
+
+  !allocate( T(n,n) )
+  !T(:,:) = S(:,:)
+
+  print *, ' apply LU decomposition ...'
+
+  ! -------------------------------------------------------------------------------------
+  !                           LU factorization of S: S = P x L x U
+
+  allocate( IPIV(n) )
+
+  call dgetrf(n, n, S, n, IPIV, INFO)
+  if(INFO .ne. 0) then
+    print*, 'dgetrf failed !!', INFO
+    stop
+  endif
+
+  ! check | S - P x L x U |
+  !allocate( ll(n,n), rr(n,n), tmp(n,n) )
+  !ll = S
+  !rr = S
+  !do i = 1, n-1
+  !  ll(i,i) = 1.d0
+  !  do j = i+1, n
+  !    ll(i,j) = 0.d0
+  !    rr(j,i) = 0.d0
+  !  enddo
+  !enddo
+  !ll(n,n) = 1.d0
+  !call dgemm( 'N', 'N', n, n, n, 1.d0          &
+  !          , ll, size(ll, 1), rr, size(rr, 1) &
+  !          , 0.d0, tmp, size(tmp, 1) )
+  ! deallocate(ll, rr)
+  !allocate( vectmp(n) )
+  !do j = n-1, 1, -1
+  !  i = IPIV(j)
+  !  if(i.ne.j) then
+  !    print *, j, i
+  !    vectmp(:) = tmp(i,:)
+  !    tmp(i,:)  = tmp(j,:)
+  !    tmp(j,:)  = vectmp(:)
+  !  endif
+  !enddo
+  !deallocate( vectmp )
+  !nrm = 0.d0
+  !do i = 1, n
+  !  do j = 1, n
+  !    nrm += dabs(tmp(j,i) - T(j,i))
+  !  enddo
+  !enddo
+  !deallocate( tmp )
+  !print*, '|L.T x R - S| =', nrm
+  !stop
+
+  ! ------
+  ! inv(L) 
+  ! ------
+
+  allocate( L(n,n) )
+  L(:,:) = S(:,:)
+
+  call dtrtri("L", "U", n, L, n, INFO)
+  if(INFO .ne. 0) then
+    print*,  'dtrtri failed !!', INFO
+    stop
+  endif
+  do i = 1, n-1
+    L(i,i) = 1.d0
+    do j = i+1, n
+      L(i,j) = 0.d0
+    enddo
+  enddo
+  L(n,n) = 1.d0
+
+  ! ------
+  ! inv(U) 
+  ! ------
+  
+  call dtrtri("U", "N", n, S, n, INFO)
+  if(INFO .ne. 0) then
+    print*,  'dtrtri failed !!', INFO
+    stop
+  endif
+
+  do i = 1, n-1
+    do j = i+1, n
+      S(j,i) = 0.d0
+    enddo
+  enddo
+
+  !
+  ! -------------------------------------------------------------------------------------
+
+  ! ---
+
+  ! -------------------------------------------------------------------------------------
+  !                               get bi-orhtog left & right vectors:
+  !                                           Vr' = Vr x inv(U) 
+  !                                           Vl' = inv(L) x inv(P) x Vl
+
+  ! inv(P) x Vl
+  allocate( vectmp(n) )
+  do j = n-1, 1, -1
+    i = IPIV(j)
+    if(i.ne.j) then
+      vectmp(:) = L(:,j)
+      L(:,j)    = L(:,i)
+      L(:,i)    = vectmp(:)
+    endif
+  enddo
+  deallocate( vectmp )
+
+  ! Vl'
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'T', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), L, size(L, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(L)
+
+  Vl = tmp
+  deallocate(tmp)
+
+  ! ---
+
+  ! Vr x inv(U) 
+  allocate( tmp(m,n) )
+  call dgemm( 'N', 'N', m, n, n, 1.d0        &
+            , Vr, size(Vr, 1), S, size(S, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  Vr = tmp
+  deallocate(tmp)
+
+  !allocate( tmp(n,n) )
+  !call dgemm( 'T', 'N', n, n, m, 1.d0          &
+  !          , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+  !          , 0.d0, tmp, size(tmp, 1) )
+  !nrm = 0.d0
+  !do i = 1, n
+  !  do j = 1, n
+  !    nrm += dabs(tmp(j,i))
+  !  enddo
+  !enddo
+  !deallocate( tmp )
+  !print*, '|L.T x R| =', nrm
+  !stop
+
+  return
+end subroutine impose_biorthog_lu
+
+! ---
+
+subroutine check_EIGVEC(n, m, A, eigval, leigvec, reigvec, thr_diag, thr_norm, stop_ifnot)
+
+  implicit none
+  integer,          intent(in)  :: n, m
+  logical,          intent(in)  :: stop_ifnot
+  double precision, intent(in)  :: A(n,n), eigval(m), leigvec(n,m), reigvec(n,m), thr_diag, thr_norm
+ 
+  integer                       :: i, j
+  double precision              :: tmp, tmp_abs, tmp_nrm, tmp_rel, tmp_dif
+  double precision              :: V_nrm, U_nrm
+  double precision, allocatable :: Mtmp(:,:)
+
+  allocate( Mtmp(n,m) )
+  
+  ! ---
+
+  Mtmp = 0.d0
+  call dgemm( 'N', 'N', n, m, n, 1.d0                  &
+            , A, size(A, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, Mtmp, size(Mtmp, 1) )
+
+  V_nrm   = 0.d0
+  tmp_nrm = 0.d0
+  tmp_abs = 0.d0
+  do j = 1, m
+    
+    tmp   = 0.d0
+    U_nrm = 0.d0
+    do i = 1, n
+      tmp     = tmp     + dabs(Mtmp(i,j) - eigval(j) * reigvec(i,j))
+      tmp_nrm = tmp_nrm + dabs(Mtmp(i,j))
+      U_nrm   = U_nrm   + reigvec(i,j) * reigvec(i,j)
+    enddo
+
+    tmp_abs = tmp_abs + tmp
+    V_nrm   = V_nrm   + U_nrm 
+    print *, j, tmp, U_nrm
+
+  enddo
+
+  tmp_rel = tmp_abs / tmp_nrm
+  tmp_dif = dabs(V_nrm - dble(m))
+
+  if( stop_ifnot .and. ((tmp_rel .gt. thr_diag) .or. (tmp_dif .gt. thr_norm)) ) then
+    print *, ' error in right-eigenvectors'
+    print *, ' err estim = ', tmp_abs, tmp_rel
+    print *, ' CR norm   = ', V_nrm 
+    stop
+  endif
+
+  ! ---
+
+  Mtmp = 0.d0
+  call dgemm( 'T', 'N', n, m, n, 1.d0                  &
+            , A, size(A, 1), leigvec, size(leigvec, 1) &
+            , 0.d0, Mtmp, size(Mtmp, 1) )
+
+  V_nrm   = 0.d0
+  tmp_nrm = 0.d0
+  tmp_abs = 0.d0
+  do j = 1, m
+
+    tmp   = 0.d0
+    U_nrm = 0.d0
+    do i = 1, n
+      tmp     = tmp     + dabs(Mtmp(i,j) - eigval(j) * leigvec(i,j))
+      tmp_nrm = tmp_nrm + dabs(Mtmp(i,j))
+      U_nrm   = U_nrm   + leigvec(i,j) * leigvec(i,j)
+    enddo
+
+    tmp_abs = tmp_abs + tmp
+    V_nrm   = V_nrm   + U_nrm 
+    print *, j, tmp, U_nrm
+
+  enddo
+
+  tmp_rel = tmp_abs / tmp_nrm
+  if( stop_ifnot .and. ((tmp_rel .gt. thr_diag) .or. (tmp_dif .gt. thr_norm)) ) then
+    print *, ' error in left-eigenvectors'
+    print *, ' err estim = ', tmp_abs, tmp_rel
+    print *, ' CR norm   = ', V_nrm 
+    stop
+  endif
+
+  ! ---
+
+  deallocate( Mtmp )
+
+end subroutine check_EIGVEC
+
+! ---
+
+subroutine check_degen(n, m, eigval, leigvec, reigvec)
+
+  implicit none
+  integer,          intent(in)    :: n, m
+  double precision, intent(in)    :: eigval(m)
+  double precision, intent(inout) :: leigvec(n,m), reigvec(n,m)
+ 
+  integer                         :: i, j
+  double precision                :: ei, ej, de, de_thr, accu_nd
+  double precision, allocatable   :: S(:,:)
+
+  de_thr = 1d-7
+
+  do i = 1, m-1
+    ei = eigval(i)
+
+    do j = i+1, m
+      ej = eigval(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+
+        leigvec(:,i) = 0.d0
+        leigvec(:,j) = 0.d0
+        leigvec(i,i) = 1.d0
+        leigvec(j,j) = 1.d0
+
+        reigvec(:,i) = 0.d0
+        reigvec(:,j) = 0.d0
+        reigvec(i,i) = 1.d0
+        reigvec(j,j) = 1.d0
+
+      endif
+
+    enddo
+  enddo
+
+  ! ---
+
+  allocate( S(m,m) )
+
+  ! S = VL x VR
+  call dgemm( 'T', 'N', m, m, n, 1.d0                              &
+            , leigvec, size(leigvec, 1), reigvec, size(reigvec, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) cycle
+      accu_nd = accu_nd + S(j,i) * S(j,i)
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  deallocate( S )
+
+  print *, ' check_degen: L & T bi-orthogonality: ok'
+  print *, ' accu_nd = ', accu_nd
+
+  if( accu_nd .lt. 1d-8 ) then
+    return
+  else
+    stop
+  endif
+
+end subroutine check_degen
+
+! ---
+
+subroutine impose_weighted_orthog_svd(n, m, W, C)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: C(n,m), W(n,n)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+
+  print *, ' apply SVD to orthogonalize vectors'
+
+  ! ---
+
+  ! C.T x W x C
+  allocate(S(m,m))
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0      &
+            , C, size(C, 1), W, size(W, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0          &
+            , tmp, size(tmp, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(tmp)
+
+  print *, ' eigenvec overlap bef SVD: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- C x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , C, size(C, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(U, Vt)
+
+  ! C <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      C(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  ! C.T x W x C
+  allocate(S(m,m))
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0      &
+            , C, size(C, 1), W, size(W, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0          &
+            , tmp, size(tmp, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(tmp)
+
+  print *, ' eigenvec overlap aft SVD: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_weighted_orthog_svd
+
+! ---
+
+subroutine impose_orthog_svd(n, m, C)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: C(n,m)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+
+  print *, ' apply SVD to orthogonalize vectors'
+
+  ! ---
+
+  allocate(S(m,m))
+
+  ! S = C.T x C
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ' eigenvec overlap bef SVD: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- C x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , C, size(C, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(U, Vt)
+
+  ! C <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      C(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  allocate(S(m,m))
+
+  ! S = C.T x C
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ' eigenvec overlap aft SVD: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_orthog_svd
+
+! ---
+
+subroutine impose_orthog_GramSchmidt(n, m, C)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: C(n,m)
+
+  integer                         :: i, j, k
+  double precision                :: Ojk, Ojj, fact_ct
+  double precision, allocatable   :: S(:,:)
+
+  print *, ''
+  print *, ' apply Gram-Schmidt to orthogonalize vectors'
+  print *, ''
+
+  ! ---
+
+  allocate(S(m,m))
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ' eigenvec overlap bef Gram-Schmidt: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  ! ---
+
+  do k = 2, m
+    do j = 1, k-1
+
+      Ojk = 0.d0    
+      Ojj = 0.d0    
+      do i = 1, n
+        Ojk = Ojk + C(i,j) * C(i,k)
+        Ojj = Ojj + C(i,j) * C(i,j)
+      enddo
+      fact_ct = Ojk / Ojj
+
+      do i = 1, n
+        C(i,k) = C(i,k) - fact_ct * C(i,j)
+      enddo
+
+    enddo
+  enddo
+
+  do k = 1, m
+    fact_ct = 0.d0    
+    do i = 1, n
+      fact_ct = fact_ct + C(i,k) * C(i,k)
+    enddo
+    fact_ct = dsqrt(fact_ct)
+    do i = 1, n
+      C(i,k) = C(i,k) / fact_ct
+    enddo
+  enddo
+
+  ! ---
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , C, size(C, 1), C, size(C, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ' eigenvec overlap aft Gram-Schmidt: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_orthog_GramSchmidt
+
+! ---
+
+subroutine impose_orthog_ones(n, deg_num, C)
+
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  integer,          intent(in)    :: deg_num(n)
+  double precision, intent(inout) :: C(n,n)
+
+  integer                         :: i, j, ii, di, dj
+
+  print *, ''
+  print *, ' orthogonalize vectors by hand'
+  print *, ''
+
+  do i = 1, n-1
+    di = deg_num(i)
+
+    if(di .gt. 1) then
+
+      do ii = 1, di
+        C(:     ,i+ii-1) = 0.d0
+        C(i+ii-1,i+ii-1) = 1.d0
+      enddo
+
+      do j = i+di+1, n
+        dj = deg_num(j)
+        if(dj .eq. di) then
+          do ii = 1, dj
+            C(:,     j+ii-1) = 0.d0
+            C(j+ii-1,j+ii-1) = 1.d0
+          enddo
+        endif
+      enddo
+
+    endif
+  enddo 
+
+end subroutine impose_orthog_ones
+
+! ---
+
+subroutine impose_orthog_degen_eigvec(n, e0, C0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: e0(n)
+  double precision, intent(inout) :: C0(n,n)
+
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: C(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = 1d-10
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+
+  
+  do i = 1, n
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+    endif
+  enddo
+
+  ! ---
+
+!  call impose_orthog_ones(n, deg_num, C0)
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+    !if(m.eq.3) then
+  
+      allocate(C(n,m))
+      do j = 1, m
+        C(1:n,j) = C0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      ! C <= C U sigma^-0.5
+      call impose_orthog_svd(n, m, C)
+
+      ! ---
+
+      ! C = I
+      !C = 0.d0
+      !do j = 1, m
+      !  C(i+j-1,j) = 1.d0
+      !enddo
+
+      ! ---
+
+!      call impose_orthog_GramSchmidt(n, m, C)
+
+      ! ---
+
+      do j = 1, m
+        C0(1:n,i+j-1) = C(1:n,j)
+      enddo
+      deallocate(C)
+
+    endif
+  enddo
+
+end subroutine impose_orthog_degen_eigvec 
+
+! ---
+
+subroutine get_halfinv_svd(n, S)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(inout) :: S(n,n)
+
+  integer                         :: num_linear_dependencies
+  integer                         :: i, j, k
+  double precision                :: accu_d, accu_nd, thresh
+  double precision, parameter     :: threshold = 1.d-6
+  double precision, allocatable   :: U(:,:), Vt(:,:), D(:)
+  double precision, allocatable   :: S0(:,:), Stmp(:,:), Stmp2(:,:)
+
+  allocate( S0(n,n) )
+  S0(1:n,1:n) = S(1:n,1:n)
+
+  allocate(U(n,n), Vt(n,n), D(n))
+  call svd(S, n, U, n, D, Vt, n, n, n)
+
+  num_linear_dependencies = 0
+  do i = 1, n
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  write(*,*) ' linear dependencies', num_linear_dependencies
+
+  S(:,:) = 0.d0
+  do k = 1, n
+    if(D(k) /= 0.d0) then
+      do j = 1, n
+        do i = 1, n
+          S(i,j) = S(i,j) + U(i,k) * D(k) * Vt(k,j)
+        enddo
+      enddo
+    endif
+  enddo
+  deallocate(U, D, Vt)
+
+  allocate( Stmp(n,n), Stmp2(n,n) )
+  Stmp  = 0.d0
+  Stmp2 = 0.d0
+  ! S^-1/2 x S
+  call dgemm( 'N', 'N', n, n, n, 1.d0        &
+            , S, size(S, 1), S0, size(S0, 1) &
+            , 0.d0, Stmp, size(Stmp, 1) )
+  ! ( S^-1/2 x S ) x S^-1/2
+  call dgemm( 'N', 'N', n, n, n, 1.d0            &
+            , Stmp, size(Stmp, 1), S, size(S, 1) &
+            , 0.d0, Stmp2, size(Stmp2, 1) )
+
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  thresh  = 1.d-10
+  do i = 1, n
+    do j = 1, n
+      if(i==j) then
+       accu_d += Stmp2(j,i)
+      else 
+       accu_nd = accu_nd + Stmp2(j,i) * Stmp2(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+  if( accu_nd.gt.thresh .or. dabs(accu_d-dble(n)).gt.thresh) then
+    print*, ' after S^-1/2: sum of off-diag S elements = ', accu_nd
+    print*, ' after S^-1/2: sum of     diag S elements = ', accu_d
+    do i = 1, n
+      write(*,'(1000(F16.10,X))') Stmp2(i,:)
+    enddo
+    stop
+  endif
+
+  deallocate(S0, Stmp, Stmp2)
+
+end subroutine get_halfinv_svd
+
+! ---
+
+subroutine check_biorthog_binormalize(n, m, Vl, Vr, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)    :: n, m
+  logical,          intent(in)    :: stop_ifnot
+  double precision, intent(inout) :: Vl(n,m), Vr(n,m)
+
+  integer                         :: i, j
+  double precision                :: thr_d, thr_nd
+  double precision                :: accu_d, accu_nd, s_tmp
+  double precision, allocatable   :: S(:,:)
+
+  thr_d  = 1d-6
+  thr_nd = 1d-7
+
+  print *, ' check bi-orthonormality'
+
+  ! ---
+
+  allocate(S(m,m))
+  call dgemm( 'T', 'N', m, m, n, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  !print *, ' overlap matrix before:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  ! S(i,i) = -1
+  do i = 1, m
+    if( (S(i,i) + 1.d0) .lt. thr_d ) then
+      do j = 1, n
+        Vl(j,i) = -1.d0 * Vl(j,i)
+      enddo
+      S(i,i) = 1.d0
+    endif
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+  print*, '    diag acc: ', accu_d
+  print*, ' nondiag acc: ', accu_nd
+
+  ! ---
+
+  if( (accu_nd .lt. thr_nd) .and. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+
+    do i = 1, m
+      print *, i, S(i,i)
+      if(dabs(S(i,i) - 1.d0) .gt. thr_d) then
+        s_tmp = 1.d0 / dsqrt(S(i,i))
+        do j = 1, n
+          Vl(j,i) = Vl(j,i) * s_tmp 
+          Vr(j,i) = Vr(j,i) * s_tmp 
+        enddo
+      endif
+    enddo
+
+  endif
+
+  ! ---
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  !print *, ' overlap matrix after:'
+  !do i = 1, m
+  !  write(*,'(1000(F16.10,X))') S(i,:)
+  !enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + S(i,i)
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+  print *, '    diag acc: ', accu_d
+  print *, ' nondiag acc: ', accu_nd
+
+  deallocate(S)
+
+  ! ---
+
+  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. (dabs(accu_d-dble(m))/dble(m) .gt. thr_d)) ) then
+    print *, accu_nd, thr_nd 
+    print *, dabs(accu_d-dble(m))/dble(m), thr_d
+    print *, ' biorthog_binormalize failed !'
+    stop
+  endif
+
+end subroutine check_biorthog_binormalize
+
+! ---
+
+subroutine check_weighted_biorthog(n, m, W, Vl, Vr, accu_d, accu_nd, S, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: Vl(n,m), Vr(n,m), W(n,n)
+  logical,          intent(in)  :: stop_ifnot
+  double precision, intent(out) :: accu_d, accu_nd, S(m,m)
+
+  integer                       :: i, j
+  double precision              :: thr_d, thr_nd
+  double precision, allocatable :: SS(:,:), tmp(:,:)
+
+  thr_d  = 1d-6
+  thr_nd = 1d-10
+
+  print *, ' check weighted bi-orthogonality'
+
+  ! ---
+
+  allocate(tmp(m,n))
+  call dgemm( 'T', 'N', m, n, n, 1.d0        &
+            , Vl, size(Vl, 1), W, size(W, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  call dgemm( 'N', 'N', m, m, n, 1.d0            &
+            , tmp, size(tmp, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  deallocate(tmp)
+
+  print *, ' overlap matrix:'
+  do i = 1, m
+    write(*,'(1000(F16.10,X))') S(i,:)
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + dabs(S(i,i))
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  print *, ' accu_nd = ', accu_nd
+  print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+
+  ! ---
+
+  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+    print *, ' non bi-orthogonal vectors !'
+    print *, ' accu_nd = ', accu_nd
+    print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+    !print *, ' overlap matrix:'
+    !do i = 1, m
+    !  write(*,'(1000(F16.10,X))') S(i,:)
+    !enddo
+    stop
+  endif
+
+end subroutine check_weighted_biorthog
+
+! ---
+
+subroutine check_biorthog(n, m, Vl, Vr, accu_d, accu_nd, S, stop_ifnot)
+
+  implicit none
+  
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: Vl(n,m), Vr(n,m)
+  logical,          intent(in)  :: stop_ifnot
+  double precision, intent(out) :: accu_d, accu_nd, S(m,m)
+
+  integer                       :: i, j
+  double precision              :: thr_d, thr_nd
+  double precision, allocatable :: SS(:,:)
+
+  thr_d  = 1d-6
+  thr_nd = 1d-10
+
+  print *, ' check bi-orthogonality'
+
+  ! ---
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0          &
+            , Vl, size(Vl, 1), Vr, size(Vr, 1) &
+            , 0.d0, S, size(S, 1) )
+  print *, ' overlap matrix:'
+  do i = 1, m
+    write(*,'(1000(F16.10,X))') S(i,:)
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + dabs(S(i,i))
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  print *, ' accu_nd = ', accu_nd
+  print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+
+  ! ---
+
+  if( stop_ifnot .and. ((accu_nd .gt. thr_nd) .or. dabs(accu_d-dble(m))/dble(m) .gt. thr_d) ) then
+    print *, ' non bi-orthogonal vectors !'
+    print *, ' accu_nd = ', accu_nd
+    print *, ' accu_d  = ', dabs(accu_d-dble(m))/dble(m)
+    !print *, ' overlap matrix:'
+    !do i = 1, m
+    !  write(*,'(1000(F16.10,X))') S(i,:)
+    !enddo
+    stop
+  endif
+
+end subroutine check_biorthog
+
+! ---
+
+subroutine check_orthog(n, m, V, accu_d, accu_nd, S)
+
+  implicit none
+  
+  integer,          intent(in)  :: n, m
+  double precision, intent(in)  :: V(n,m)
+  double precision, intent(out) :: accu_d, accu_nd, S(m,m)
+
+  integer                       :: i, j
+
+  S = 0.d0
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , V, size(V, 1), V, size(V, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ''
+  print *, ' overlap matrix:'
+  do i = 1, m
+    write(*,'(1000(F16.10,X))') S(i,:)
+  enddo
+  print *, ''
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, m
+    do j = 1, m
+      if(i==j) then
+        accu_d = accu_d + dabs(S(i,i))
+      else
+        accu_nd = accu_nd + S(j,i) * S(j,i)
+      endif
+    enddo
+  enddo
+  accu_nd = dsqrt(accu_nd)
+
+  !print*, '    diag acc: ', accu_d
+  !print*, ' nondiag acc: ', accu_nd
+
+end subroutine check_orthog
+
+! ---
+
+subroutine impose_biorthog_degen_eigvec(n, e0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: e0(n)
+  double precision, intent(inout) :: L0(n,n), R0(n,n)
+
+  logical                         :: complex_root
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  double precision                :: accu_d, accu_nd
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), S(:,:), S_inv_half(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = 1d-10
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+  
+  do i = 1, n
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+    endif
+  enddo
+
+  ! ---
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+  
+      allocate(L(n,m))
+      allocate(R(n,m))
+
+      do j = 1, m
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_orthog_svd(n, m, L)
+      call impose_orthog_svd(n, m, R)
+
+      ! ---
+
+      !allocate(S(m,m))
+      !call dgemm( 'T', 'N', m, m, n, 1.d0      &
+      !          , L, size(L, 1), R, size(R, 1) &
+      !          , 0.d0, S, size(S, 1) )
+      !allocate(S_inv_half(m,m))
+      !call get_inv_half_nonsymmat_diago(S, m, S_inv_half, complex_root)
+      !if(complex_root) then
+      !  print*, ' complex roots in inv_half !!! '
+      !  stop
+      !endif
+      !call bi_ortho_s_inv_half(m, L, R, S_inv_half)
+      !deallocate(S, S_inv_half)
+
+      call impose_biorthog_svd(n, m, L, R)
+
+      !call impose_biorthog_qr(n, m, L, R)
+
+      ! ---
+
+      do j = 1, m
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+
+      deallocate(L, R)
+
+    endif
+  enddo
+
+end subroutine impose_biorthog_degen_eigvec 
+
+! ---
+
+subroutine impose_orthog_biorthog_degen_eigvec(n, e0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: e0(n)
+  double precision, intent(inout) :: L0(n,n), R0(n,n)
+
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  double precision                :: accu_d, accu_nd
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), S(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = 1d-10
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+  
+  do i = 1, n
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+    endif
+  enddo
+
+  ! ---
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+  
+      allocate(L(n,m))
+      allocate(R(n,m))
+
+      do j = 1, m
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_orthog_svd(n, m, L)
+      call impose_orthog_svd(n, m, R)
+
+      ! ---
+  
+      call impose_biorthog_qr(n, m, L, R)
+
+      allocate(S(m,m))
+      call check_biorthog(n, m, L, R, accu_d, accu_nd, S, .true.)
+      !call check_biorthog(n, m, L, L, accu_d, accu_nd, S, .true.)
+      !call check_biorthog(n, m, R, R, accu_d, accu_nd, S, .false.)
+      deallocate(S)
+
+      ! ---
+
+      do j = 1, m
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+
+      deallocate(L, R)
+
+    endif
+  enddo
+
+end subroutine impose_orthog_biorthog_degen_eigvec 
+
+! ---
+
+subroutine impose_unique_biorthog_degen_eigvec(n, e0, C0, W0, L0, R0)
+
+  implicit none
+
+  integer,          intent(in)    :: n
+  double precision, intent(in)    :: e0(n), W0(n,n), C0(n,n)
+  double precision, intent(inout) :: L0(n,n), R0(n,n)
+
+  logical                         :: complex_root
+  integer                         :: i, j, k, m
+  double precision                :: ei, ej, de, de_thr
+  integer,          allocatable   :: deg_num(:)
+  double precision, allocatable   :: L(:,:), R(:,:), C(:,:)
+  double precision, allocatable   :: S(:,:), S_inv_half(:,:), tmp(:,:)
+
+  ! ---
+
+  allocate( deg_num(n) )
+  do i = 1, n
+    deg_num(i) = 1
+  enddo
+
+  de_thr = 1d-10
+
+  do i = 1, n-1
+    ei = e0(i)
+
+    ! already considered in degen vectors
+    if(deg_num(i).eq.0) cycle
+
+    do j = i+1, n
+      ej = e0(j)
+      de = dabs(ei - ej)
+
+      if(de .lt. de_thr) then
+        deg_num(i) = deg_num(i) + 1 
+        deg_num(j) = 0
+      endif
+
+    enddo
+  enddo
+  
+  do i = 1, n
+    if(deg_num(i).gt.1) then
+      print *, ' degen on', i, deg_num(i)
+    endif
+  enddo
+
+  ! ---
+
+  do i = 1, n
+    m = deg_num(i)
+
+    if(m .gt. 1) then
+  
+      allocate(L(n,m))
+      allocate(R(n,m))
+      allocate(C(n,m))
+
+      do j = 1, m
+        L(1:n,j) = L0(1:n,i+j-1)
+        R(1:n,j) = R0(1:n,i+j-1)
+        C(1:n,j) = C0(1:n,i+j-1)
+      enddo
+
+      ! ---
+
+      call impose_orthog_svd(n, m, L)
+      call impose_orthog_svd(n, m, R)
+
+      ! ---
+
+
+      ! TODO:
+      ! select C correctly via overlap
+      ! or via selecting degen in HF
+
+      !call max_overlap_qr(n, m, C, L)
+      !call max_overlap_qr(n, m, C, R)
+
+
+      allocate(tmp(m,n))
+      allocate(S(m,m))
+
+      call dgemm( 'T', 'N', m, n, n, 1.d0        &
+                , L, size(L, 1), W0, size(W0, 1) &
+                , 0.d0, tmp, size(tmp, 1) )
+      call dgemm( 'N', 'N', m, m, n, 1.d0          &
+                , tmp, size(tmp, 1), C, size(C, 1) &
+                , 0.d0, S, size(S, 1) )
+
+      call max_overlap_qr(n, m, S, L)
+      !call max_overlap_invprod(n, m, S, L)
+
+      call dgemm( 'T', 'N', m, n, n, 1.d0        &
+                , C, size(C, 1), W0, size(W0, 1) &
+                , 0.d0, tmp, size(tmp, 1) )
+      call dgemm( 'N', 'N', m, m, n, 1.d0          &
+                , tmp, size(tmp, 1), R, size(R, 1) &
+                , 0.d0, S, size(S, 1) )
+
+      call max_overlap_qr(n, m, S, R)
+      !call max_overlap_invprod(n, m, S, R)
+
+      deallocate(S, tmp)
+
+      ! ---
+  
+      allocate(S(m,m), S_inv_half(m,m))
+      call dgemm( 'T', 'N', m, m, n, 1.d0      &
+                , L, size(L, 1), R, size(R, 1) &
+                , 0.d0, S, size(S, 1) )
+      call get_inv_half_nonsymmat_diago(S, m, S_inv_half, complex_root)
+      if(complex_root)then
+        call impose_biorthog_svd(n, m, L, R)
+        !call impose_biorthog_qr(n, m, L, R)
+      else
+        call bi_ortho_s_inv_half(m, L, R, S_inv_half)
+      endif
+      deallocate(S, S_inv_half)
+
+      ! ---
+
+      do j = 1, m
+        L0(1:n,i+j-1) = L(1:n,j)
+        R0(1:n,i+j-1) = R(1:n,j)
+      enddo
+
+      deallocate(L, R, C)
+
+    endif
+  enddo
+
+end subroutine impose_unique_biorthog_degen_eigvec 
+
+! ---
+
+subroutine max_overlap_qr(m, n, S0, V)
+
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: S0(n,n)
+  double precision, intent(inout) :: V(m,n)
+
+  integer                         :: i, j
+  integer                         :: LWORK, INFO
+  double precision, allocatable   :: TAU(:), WORK(:)
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+
+  allocate(S(n,n))
+  S = S0
+
+  ! ---
+
+  allocate( TAU(n), WORK(1) )
+
+  LWORK = -1
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dgeqrf(n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dgeqrf failed !!', INFO
+    stop
+  endif
+
+  ! get Q in S matrix
+  LWORK = -1
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  LWORK = max(n, int(WORK(1)))
+  deallocate(WORK)
+
+  allocate( WORK(LWORK) )
+  call dorgqr(n, n, n, S, n, TAU, WORK, LWORK, INFO)
+  if(INFO .ne. 0) then
+    print*,'dorgqr failed !!', INFO
+    stop
+  endif
+
+  deallocate( WORK, TAU )
+
+  ! ---
+
+  ! V0.T <-- Q.T x V0.T, where Q = S
+
+  allocate( tmp(n,m) )
+
+  call dgemm( 'T', 'T', n, m, n, 1.d0      &
+            , S, size(S, 1), V, size(V, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+
+  deallocate(S)
+
+  do i = 1, n
+    do j = 1, m
+      V(j,i) = tmp(i,j)
+    enddo
+  enddo
+
+  deallocate(tmp)
+
+  ! ---
+
+  return
+end subroutine max_overlap_qr
+
+! ---
+
+subroutine max_overlap_invprod(n, m, S, V)
+
+  implicit none 
+  integer,          intent(in)    :: m, n
+  double precision, intent(in)    :: S(m,m)
+  double precision, intent(inout) :: V(n,m)
+
+  integer                         :: i
+  double precision, allocatable   :: invS(:,:), tmp(:,:)
+
+  allocate(invS(m,m))
+  call get_inverse(S, size(S, 1), m, invS, size(invS, 1))
+  print *, ' overlap '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+  print *, ' inv overlap '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') invS(i,:)
+  enddo
+
+  allocate(tmp(n,m))
+  tmp = V
+
+  call dgemm( 'N', 'N', n, m, m, 1.d0        &
+            , tmp, size(tmp, 1), invS, size(invS, 1) &
+            , 0.d0, V, size(V, 1) )
+
+  deallocate(tmp, invS)
+
+  return
+end subroutine max_overlap_invprod
+
+! ---
+
+subroutine impose_biorthog_svd(n, m, L, R)
+
+  implicit none
+
+  integer,          intent(in)    :: n, m
+  double precision, intent(inout) :: L(n,m), R(n,m)
+
+  integer                         :: i, j, num_linear_dependencies
+  double precision                :: threshold
+  double precision, allocatable   :: S(:,:), tmp(:,:)
+  double precision, allocatable   :: U(:,:), V(:,:), Vt(:,:), D(:)
+
+  ! ---
+
+  allocate(S(m,m))
+
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , L, size(L, 1), R, size(R, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ' overlap bef SVD: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+
+  ! ---
+ 
+  allocate(U(m,m), Vt(m,m), D(m))
+
+  call svd(S, m, U, m, D, Vt, m, m, m)
+
+  deallocate(S)
+
+  threshold               = 1.d-6
+  num_linear_dependencies = 0
+  do i = 1, m
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  if(num_linear_dependencies > 0) then
+    write(*,*) ' linear dependencies = ', num_linear_dependencies
+    write(*,*) ' m                   = ', m
+    stop
+  endif
+
+  allocate(V(m,m))
+  do i = 1, m
+    do j = 1, m
+      V(j,i) = Vt(i,j)
+    enddo
+  enddo
+  deallocate(Vt)
+
+  ! ---
+
+  allocate(tmp(n,m))
+
+  ! tmp <-- R x V
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , R, size(R, 1), V, size(V, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(V)
+  ! R <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      R(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  ! tmp <-- L x U
+  call dgemm( 'N', 'N', n, m, m, 1.d0      &
+            , L, size(L, 1), U, size(U, 1) &
+            , 0.d0, tmp, size(tmp, 1) )
+  deallocate(U)
+  ! L <-- tmp x sigma^-0.5
+  do j = 1, m
+    do i = 1, n
+      L(i,j) = tmp(i,j) * D(j)
+    enddo
+  enddo
+
+  deallocate(D, tmp)
+
+  ! ---
+
+  allocate(S(m,m))
+  call dgemm( 'T', 'N', m, m, n, 1.d0      &
+            , L, size(L, 1), R, size(R, 1) &
+            , 0.d0, S, size(S, 1) )
+
+  print *, ' overlap aft SVD: '
+  do i = 1, m
+    write(*, '(1000(F16.10,X))') S(i,:)
+  enddo
+  deallocate(S)
+
+  ! ---
+
+end subroutine impose_biorthog_svd
+
+! ---
+
diff --git a/src/non_hermit_dav/new_routines.irp.f b/src/non_hermit_dav/new_routines.irp.f
new file mode 100644
index 00000000..07ac5917
--- /dev/null
+++ b/src/non_hermit_dav/new_routines.irp.f
@@ -0,0 +1,669 @@
+subroutine non_hrmt_diag_split_degen_bi_orthog(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  double precision              :: shift_current
+  double precision              :: r,thr,accu_d, accu_nd
+  integer,          allocatable :: iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+  double precision :: accu,thr_cut, thr_norm=1d0
+
+
+  thr_cut = 1.d-15
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+ ! initialization
+  shift_current = 1.d-15
+  iteration = 0 
+  print*,'***** iteration = ',iteration
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+!  print*,'Aw'
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+!   write(*,'(100(F16.10,X))')A(:,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  allocate(im_part(n),iorder(n))
+  allocate( S(n,n) )
+
+
+  Aw = A_save
+  call cancel_small_elmts(aw,n,thr_cut)
+  call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+  do i = 1, n
+   im_part(i) = -dabs(WI(i))
+   iorder(i) = i
+  enddo
+  call dsort(im_part, iorder, n)
+  n_real_eigv = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-20)then
+      n_real_eigv += 1
+    else
+!      print*,'Found an imaginary component to eigenvalue'
+!      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+  if(n_real_eigv.ne.n)then
+   shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+  else
+   print*,'All eigenvalues are real !'
+  endif
+
+
+  do while(n_real_eigv.ne.n)
+   iteration += 1
+   print*,'***** iteration = ',iteration
+   if(shift_current.gt.1.d-3)then
+    print*,'shift_current > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   Aw = A_save
+   call cancel_small_elmts(Aw,n,thr_cut)
+   call split_matrix_degen(Aw,n,shift_current)
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   n_real_eigv = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_real_eigv+= 1
+     else
+!       print*,'Found an imaginary component to eigenvalue'
+!       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   if(n_real_eigv.ne.n)then
+    do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+    enddo
+    call dsort(im_part, iorder, n)
+    shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+    print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+    print*,'Splitting the degeneracies by ',shift_current
+   else
+    print*,'All eigenvalues are real !'
+   endif
+  enddo
+  !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES 
+  do i = 1, n
+   eigval(i) = WR(i)
+   iorder(i) = i
+  enddo
+  call dsort(eigval,iorder,n)
+  do i = 1, n
+!   print*,'eigval(i) = ',eigval(i)
+   reigvec_tmp(:,i) = VR(:,iorder(i))
+   leigvec_tmp(:,i) = Vl(:,iorder(i))
+  enddo
+
+!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
+  !                               check bi-orthogonality
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  if( accu_nd .lt. 1d-10 ) then
+    print *, ' bi-orthogonality: ok'
+  else
+    print *, ' '
+    print *, ' bi-orthogonality: not imposed yet'
+    print *, ' '
+    print *, ' '
+    print *, ' orthog between degen eigenvect' 
+    print *, ' '
+    double precision, allocatable :: S_nh_inv_half(:,:)
+    allocate(S_nh_inv_half(n,n))
+    logical :: complex_root
+    deallocate(S_nh_inv_half)
+    call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp)
+    call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp)
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+    if( accu_nd .lt. 1d-10 ) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S)
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+     if( accu_nd .lt. 1d-10 ) then
+       print *, ' bi-orthogonality: ok'
+     else 
+      print*,'New vectors not bi-orthonormals at ',accu_nd
+      print*,'Must be a deep problem ...'
+      stop
+     endif
+    endif
+  endif
+ 
+  !! EIGENVECTORS SORTED AND BI-ORTHONORMAL
+  do i = 1, n
+   do j = 1, n
+    VR(iorder_origin(j),i) = reigvec_tmp(j,i)
+    VL(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+  !! RECOMPUTING THE EIGENVALUES 
+  eigval = 0.d0
+  do i = 1, n
+   iorder(i) = i
+   accu = 0.d0
+   do j = 1, n
+    accu += VL(j,i) * VR(j,i) 
+    do k = 1, n
+     eigval(i) +=  VL(j,i) * A(j,k) * VR(k,i) 
+    enddo
+   enddo
+   eigval(i) *= 1.d0/accu
+!   print*,'eigval(i) = ',eigval(i)
+  enddo
+  !! RESORT JUST TO BE SURE
+  call dsort(eigval, iorder, n)
+  do i = 1, n
+   do j = 1, n
+    reigvec(j,i) = VR(j,iorder(i))
+    leigvec(j,i) = VL(j,iorder(i))
+   enddo
+  enddo
+  print*,'Checking for final reigvec/leigvec'
+  shift_current = max(1.d-10,shift_current)
+  print*,'Thr for eigenvectors = ',shift_current
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  
+  if( accu_nd .lt. 1d-10 ) then
+    print *, ' bi-orthogonality: ok'
+  else 
+   print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
+   print*,'Eigenvectors are not bi orthonormal ..'
+   print*,'accu_nd = ',accu_nd
+   stop
+  endif
+
+end 
+
+
+
+subroutine non_hrmt_diag_split_degen_s_inv_half(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine which returns the sorted REAL EIGENVALUES ONLY and corresponding LEFT/RIGHT eigenvetors 
+  !
+  ! of a non hermitian matrix A(n,n)
+  !
+  ! n_real_eigv is the number of real eigenvalues, which might be smaller than the dimension "n" 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  double precision              :: shift_current
+  double precision              :: r,thr,accu_d, accu_nd
+  integer,          allocatable :: iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+  double precision :: accu,thr_cut, thr_norm=1.d0
+  double precision, allocatable :: S_nh_inv_half(:,:)
+  logical :: complex_root
+
+
+  thr_cut = 1.d-15
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+ ! initialization
+  shift_current = 1.d-15
+  iteration = 0 
+  print*,'***** iteration = ',iteration
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+!  print*,'Aw'
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+!   write(*,'(100(F16.10,X))')A(:,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  allocate(im_part(n),iorder(n))
+  allocate( S(n,n) )
+  allocate(S_nh_inv_half(n,n))
+
+
+  Aw = A_save
+  call cancel_small_elmts(aw,n,thr_cut)
+  call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+  do i = 1, n
+   im_part(i) = -dabs(WI(i))
+   iorder(i) = i
+  enddo
+  call dsort(im_part, iorder, n)
+  n_real_eigv = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-20)then
+      n_real_eigv += 1
+    else
+!      print*,'Found an imaginary component to eigenvalue'
+!      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+  if(n_real_eigv.ne.n)then
+   shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+  else
+   print*,'All eigenvalues are real !'
+  endif
+
+
+  do while(n_real_eigv.ne.n)
+   iteration += 1
+   print*,'***** iteration = ',iteration
+   if(shift_current.gt.1.d-3)then
+    print*,'shift_current > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   Aw = A_save
+!   thr_cut = shift_current
+   call cancel_small_elmts(Aw,n,thr_cut)
+   call split_matrix_degen(Aw,n,shift_current)
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   n_real_eigv = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_real_eigv+= 1
+     else
+!       print*,'Found an imaginary component to eigenvalue'
+!       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   if(n_real_eigv.ne.n)then
+    do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+    enddo
+    call dsort(im_part, iorder, n)
+    shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+    print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+    print*,'Splitting the degeneracies by ',shift_current
+   else
+    print*,'All eigenvalues are real !'
+   endif
+  enddo
+  !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES 
+  do i = 1, n
+   eigval(i) = WR(i)
+   iorder(i) = i
+  enddo
+  call dsort(eigval,iorder,n)
+  do i = 1, n
+!   print*,'eigval(i) = ',eigval(i)
+   reigvec_tmp(:,i) = VR(:,iorder(i))
+   leigvec_tmp(:,i) = Vl(:,iorder(i))
+  enddo
+
+!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
+  !                               check bi-orthogonality
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  if( accu_nd .lt. 1d-10 ) then
+    print *, ' bi-orthogonality: ok'
+  else
+    print *, ' '
+    print *, ' bi-orthogonality: not imposed yet'
+    if(complex_root)then 
+     print *, ' '
+     print *, ' '
+     print *, ' orthog between degen eigenvect' 
+     print *, ' '
+     ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
+     call impose_orthog_degen_eigvec(n, eigval, reigvec_tmp) ! orthogonalization of reigvec
+     call impose_orthog_degen_eigvec(n, eigval, leigvec_tmp) ! orthogonalization of leigvec
+     call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S) 
+
+     if( accu_nd .lt. 1d-10 ) then
+       print *, ' bi-orthogonality: ok'
+     else 
+      print*,'New vectors not bi-orthonormals at ', accu_nd
+      call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half, complex_root)
+      if(complex_root)then 
+       call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp) ! bi-orthonormalization using QR
+      else
+       print*,'S^{-1/2} exists !!'
+       call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
+      endif
+     endif
+    else ! the matrix S^{-1/2} exists
+     print*,'S^{-1/2} exists !!'
+     call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
+    endif
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S, .false.)
+    if( accu_nd .lt. 1d-10 ) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     print*,'Must be a deep problem ...'
+     stop
+    endif
+  endif
+ 
+  !! EIGENVECTORS SORTED AND BI-ORTHONORMAL
+  do i = 1, n
+   do j = 1, n
+    VR(iorder_origin(j),i) = reigvec_tmp(j,i)
+    VL(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+  !! RECOMPUTING THE EIGENVALUES 
+  eigval = 0.d0
+  do i = 1, n
+   iorder(i) = i
+   accu = 0.d0
+   do j = 1, n
+    accu += VL(j,i) * VR(j,i) 
+    do k = 1, n
+     eigval(i) +=  VL(j,i) * A(j,k) * VR(k,i) 
+    enddo
+   enddo
+   eigval(i) *= 1.d0/accu
+!   print*,'eigval(i) = ',eigval(i)
+  enddo
+  !! RESORT JUST TO BE SURE
+  call dsort(eigval, iorder, n)
+  do i = 1, n
+   do j = 1, n
+    reigvec(j,i) = VR(j,iorder(i))
+    leigvec(j,i) = VL(j,iorder(i))
+   enddo
+  enddo
+  print*,'Checking for final reigvec/leigvec'
+  shift_current = max(1.d-10,shift_current)
+  print*,'Thr for eigenvectors = ',shift_current
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec, shift_current, thr_norm, .false.)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S, .false.)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  
+  if( accu_nd .lt. 1d-10 ) then
+    print *, ' bi-orthogonality: ok'
+  else 
+   print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
+   print*,'Eigenvectors are not bi orthonormal ..'
+   print*,'accu_nd = ',accu_nd
+   stop
+  endif
+
+end 
+
+
+subroutine non_hrmt_fock_mat(n, A, leigvec, reigvec, n_real_eigv, eigval)
+
+  BEGIN_DOC
+  !
+  ! routine returning the eigenvalues and left/right eigenvectors of the TC fock matrix 
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: A(n,n)
+  integer,          intent(out) :: n_real_eigv
+  double precision, intent(out) :: reigvec(n,n), leigvec(n,n), eigval(n)
+  double precision, allocatable :: reigvec_tmp(:,:), leigvec_tmp(:,:)
+
+  integer                       :: i, j, n_degen,k , iteration
+  double precision              :: shift_current
+  double precision              :: r,thr,accu_d, accu_nd
+  integer,          allocatable :: iorder_origin(:),iorder(:)
+  double precision, allocatable :: WR(:), WI(:), Vl(:,:), VR(:,:),S(:,:)
+  double precision, allocatable :: Aw(:,:),diag_elem(:),A_save(:,:)
+  double precision, allocatable :: im_part(:),re_part(:)
+  double precision :: accu,thr_cut
+  double precision, allocatable :: S_nh_inv_half(:,:)
+  logical :: complex_root
+
+
+  thr_cut = 1.d-15
+  print*,'Computing the left/right eigenvectors ...'
+  print*,'Using the degeneracy splitting algorithm'
+ ! initialization
+  shift_current = 1.d-15
+  iteration = 0 
+  print*,'***** iteration = ',iteration
+
+
+  ! pre-processing the matrix :: sorting by diagonal elements
+  allocate(reigvec_tmp(n,n), leigvec_tmp(n,n))
+  allocate(diag_elem(n),iorder_origin(n),A_save(n,n))
+!  print*,'Aw'
+  do i = 1, n
+   iorder_origin(i) = i
+   diag_elem(i) = A(i,i)
+!   write(*,'(100(F16.10,X))')A(:,i)
+  enddo
+  call dsort(diag_elem, iorder_origin, n)
+  do i = 1, n
+   do j = 1, n
+    A_save(j,i) = A(iorder_origin(j),iorder_origin(i))
+   enddo
+  enddo
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n), Aw(n,n))
+  allocate(im_part(n),iorder(n))
+  allocate( S(n,n) )
+  allocate(S_nh_inv_half(n,n))
+
+
+  Aw = A_save
+  call cancel_small_elmts(aw,n,thr_cut)
+  call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+  do i = 1, n
+   im_part(i) = -dabs(WI(i))
+   iorder(i) = i
+  enddo
+  call dsort(im_part, iorder, n)
+  n_real_eigv = 0
+  do i = 1, n
+    if(dabs(WI(i)).lt.1.d-20)then
+      n_real_eigv += 1
+    else
+!      print*,'Found an imaginary component to eigenvalue'
+!      print*,'Re(i) + Im(i)',WR(i),WI(i)
+    endif
+  enddo
+  if(n_real_eigv.ne.n)then
+   shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+   print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+   print*,'Splitting the degeneracies by ',shift_current
+  else
+   print*,'All eigenvalues are real !'
+  endif
+
+
+  do while(n_real_eigv.ne.n)
+   iteration += 1
+   print*,'***** iteration = ',iteration
+   if(shift_current.gt.1.d-3)then
+    print*,'shift_current > 1.d-3 !!'
+    print*,'Your matrix intrinsically contains complex eigenvalues'
+    stop
+   endif
+   Aw = A_save
+!   thr_cut = shift_current
+   call cancel_small_elmts(Aw,n,thr_cut)
+   call split_matrix_degen(Aw,n,shift_current)
+   call lapack_diag_non_sym(n,Aw,WR,WI,VL,VR)
+   n_real_eigv = 0
+   do i = 1, n
+     if(dabs(WI(i)).lt.1.d-20)then
+       n_real_eigv+= 1
+     else
+!       print*,'Found an imaginary component to eigenvalue'
+!       print*,'Re(i) + Im(i)',WR(i),WI(i)
+     endif
+   enddo
+   if(n_real_eigv.ne.n)then
+    do i = 1, n
+     im_part(i) = -dabs(WI(i))
+     iorder(i) = i
+    enddo
+    call dsort(im_part, iorder, n)
+    shift_current = max(10.d0 * dabs(im_part(1)),shift_current*10.d0)
+    print*,'Largest imaginary part found in eigenvalues = ',im_part(1)
+    print*,'Splitting the degeneracies by ',shift_current
+   else
+    print*,'All eigenvalues are real !'
+   endif
+  enddo
+  !!!!!!!!!!!!!!!! SORTING THE EIGENVALUES 
+  do i = 1, n
+   eigval(i) = WR(i)
+   iorder(i) = i
+  enddo
+  call dsort(eigval,iorder,n)
+  do i = 1, n
+!   print*,'eigval(i) = ',eigval(i)
+   reigvec_tmp(:,i) = VR(:,iorder(i))
+   leigvec_tmp(:,i) = Vl(:,iorder(i))
+  enddo
+
+!!! ONCE ALL EIGENVALUES ARE REAL ::: CHECK BI-ORTHONORMALITY
+  !                               check bi-orthogonality
+  call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  if( accu_nd .lt. 1d-10 ) then
+    print *, ' bi-orthogonality: ok'
+  else
+    print *, ' '
+    print *, ' bi-orthogonality: not imposed yet'
+    print *, ' '
+    print *, ' '
+    print *, ' Using impose_unique_biorthog_degen_eigvec' 
+    print *, ' '
+    ! bi-orthonormalization using orthogonalization of left, right and then QR between left and right
+    call impose_unique_biorthog_degen_eigvec(n, eigval, mo_coef, leigvec_tmp, reigvec_tmp)
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
+    print*,'accu_nd = ',accu_nd
+    if( accu_nd .lt. 1d-10 ) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     call get_inv_half_nonsymmat_diago(S, n, S_nh_inv_half,complex_root)
+     if(complex_root)then 
+      print*,'S^{-1/2} does not exits, using QR bi-orthogonalization'
+      call impose_biorthog_qr(n, n, leigvec_tmp, reigvec_tmp, S) ! bi-orthonormalization using QR
+     else
+      print*,'S^{-1/2} exists !!'
+      call bi_ortho_s_inv_half(n,leigvec_tmp,reigvec_tmp,S_nh_inv_half) ! use of S^{-1/2} bi-orthonormalization 
+     endif
+    endif
+    call check_biorthog(n, n, leigvec_tmp, reigvec_tmp, accu_d, accu_nd, S)
+    if( accu_nd .lt. 1d-10 ) then
+      print *, ' bi-orthogonality: ok'
+    else 
+     print*,'New vectors not bi-orthonormals at ',accu_nd
+     print*,'Must be a deep problem ...'
+     stop
+    endif
+  endif
+ 
+  !! EIGENVECTORS SORTED AND BI-ORTHONORMAL
+  do i = 1, n
+   do j = 1, n
+    VR(iorder_origin(j),i) = reigvec_tmp(j,i)
+    VL(iorder_origin(j),i) = leigvec_tmp(j,i)
+   enddo
+  enddo
+
+  !! RECOMPUTING THE EIGENVALUES 
+  eigval = 0.d0
+  do i = 1, n
+   iorder(i) = i
+   accu = 0.d0
+   do j = 1, n
+    accu += VL(j,i) * VR(j,i) 
+    do k = 1, n
+     eigval(i) +=  VL(j,i) * A(j,k) * VR(k,i) 
+    enddo
+   enddo
+   eigval(i) *= 1.d0/accu
+!   print*,'eigval(i) = ',eigval(i)
+  enddo
+  !! RESORT JUST TO BE SURE
+  call dsort(eigval, iorder, n)
+  do i = 1, n
+   do j = 1, n
+    reigvec(j,i) = VR(j,iorder(i))
+    leigvec(j,i) = VL(j,iorder(i))
+   enddo
+  enddo
+  print*,'Checking for final reigvec/leigvec'
+  shift_current = max(1.d-10,shift_current)
+  print*,'Thr for eigenvectors = ',shift_current
+  call check_EIGVEC(n, n, A, eigval, leigvec, reigvec,shift_current)
+  call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S)
+  print *, ' accu_nd bi-orthog = ', accu_nd
+  
+  if( accu_nd .lt. 1d-10 ) then
+    print *, ' bi-orthogonality: ok'
+  else 
+   print*,'Something went wrong in non_hrmt_diag_split_degen_bi_orthog'
+   print*,'Eigenvectors are not bi orthonormal ..'
+   print*,'accu_nd = ',accu_nd
+   stop
+  endif
+
+end 
+
+
diff --git a/src/non_hermit_dav/project.irp.f b/src/non_hermit_dav/project.irp.f
new file mode 100644
index 00000000..c04719ac
--- /dev/null
+++ b/src/non_hermit_dav/project.irp.f
@@ -0,0 +1,53 @@
+subroutine h_non_hermite(v,u,Hmat,a,N_st,sze) 
+ implicit none
+ BEGIN_DOC
+ ! Template of routine for the application of H
+ !
+ ! Here, it is done with the Hamiltonian matrix 
+ !
+ ! on the set of determinants of psi_det 
+ !
+ ! Computes $v = a * H | u \rangle$ 
+ !
+ END_DOC
+ integer, intent(in)              :: N_st,sze
+ double precision, intent(in)     :: u(sze,N_st), Hmat(sze,sze), a
+ double precision, intent(inout)  :: v(sze,N_st)
+ integer :: i,j,k 
+ do k = 1, N_st
+  do j = 1, sze
+   do i = 1, sze
+    v(i,k) += a * u(j,k) * Hmat(i,j)
+   enddo
+  enddo
+ enddo
+end
+
+
+subroutine exp_tau_H(u,v,hmat,tau,et,N_st,sze)
+ implicit none
+ BEGIN_DOC
+! realises v = (1 - tau (H - et)) u
+ END_DOC
+ integer, intent(in) :: N_st,sze
+ double precision, intent(in) :: hmat(sze,sze), u(sze,N_st), tau, et
+ double precision, intent(out):: v(sze,N_st)
+ double precision :: a
+ integer :: i,j
+ v = (1.d0 + tau * et) * u 
+ a = -1.d0 * tau
+ call h_non_hermite(v,u,Hmat,a,N_st,sze)
+end
+
+double precision function project_phi0(u,Hmat0,N_st,sze)
+ implicit none
+ integer, intent(in)              :: N_st,sze
+ double precision, intent(in)     :: u(sze,N_st), Hmat0(sze)
+ integer :: j
+ project_phi0 = 0.d0
+ do j = 1, sze
+  project_phi0 += u(j,1) * Hmat0(j) 
+ enddo
+ project_phi0 *= 1.d0 / u(1,1)
+end
+
diff --git a/src/non_hermit_dav/utils.irp.f b/src/non_hermit_dav/utils.irp.f
new file mode 100644
index 00000000..7f331a6b
--- /dev/null
+++ b/src/non_hermit_dav/utils.irp.f
@@ -0,0 +1,325 @@
+
+subroutine get_inv_half_svd(matrix, n, matrix_inv_half)
+
+  BEGIN_DOC
+  !   :math:`X = S^{-1/2}` obtained by SVD
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: matrix(n,n)
+  double precision, intent(out) :: matrix_inv_half(n,n)
+
+  integer                       :: num_linear_dependencies
+  integer                       :: LDA, LDC
+  integer                       :: info, i, j, k
+  double precision, parameter   :: threshold = 1.d-6
+  double precision, allocatable :: U(:,:),Vt(:,:), D(:),matrix_half(:,:),D_half(:)
+
+  double precision :: accu_d,accu_nd
+
+  LDA = size(matrix, 1)
+  LDC = size(matrix_inv_half, 1)
+  if(LDA .ne. LDC) then
+    print*, ' LDA != LDC'
+    stop
+  endif
+
+  print*, ' n   = ', n
+  print*, ' LDA = ', LDA
+  print*, ' LDC = ', LDC
+
+  double precision,allocatable :: WR(:),WI(:),VL(:,:),VR(:,:)
+  allocate(WR(n),WI(n),VL(n,n),VR(n,n))
+  call lapack_diag_non_sym(n,matrix,WR,WI,VL,VR)
+  do i = 1, n
+    print*,'WR,WI',WR(i),WI(i)
+  enddo
+
+
+  allocate(U(LDC,n), Vt(LDA,n), D(n))
+
+  call svd(matrix, LDA, U, LDC, D, Vt, LDA, n, n)
+  double precision, allocatable :: tmp1(:,:),tmp2(:,:),D_mat(:,:)
+  allocate(tmp1(n,n),tmp2(n,n),D_mat(n,n),matrix_half(n,n),D_half(n))
+  D_mat = 0.d0
+  do i = 1,n
+   D_mat(i,i) = D(i)
+  enddo
+  ! matrix = U D Vt
+  ! tmp1 = U D
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            , U, size(U, 1), D_mat, size(D_mat, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  ! tmp2 = tmp1 X Vt = matrix
+  tmp2 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            , tmp1, size(tmp1, 1), Vt, size(Vt, 1) &
+            , 0.d0, tmp2, size(tmp2, 1) )
+  print*,'Checking the recomposition of the matrix'
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+   accu_d += dabs(tmp2(i,i) - matrix(i,i))
+   do j = 1, n
+    if(i==j)cycle 
+    accu_nd += dabs(tmp2(j,i) - matrix(j,i))
+   enddo
+  enddo
+  print*,'accu_d  =',accu_d
+  print*,'accu_nd =',accu_nd
+  print*,'passed the recomposition'
+
+  num_linear_dependencies = 0
+  do i = 1, n
+    if(abs(D(i)) <= threshold) then
+      D(i) = 0.d0
+      num_linear_dependencies += 1
+    else
+      ASSERT (D(i) > 0.d0)
+      D_half(i) = dsqrt(D(i))
+      D(i) = 1.d0 / dsqrt(D(i))
+    endif
+  enddo
+  write(*,*) ' linear dependencies', num_linear_dependencies
+
+  matrix_inv_half = 0.d0
+  matrix_half = 0.d0
+  do k = 1, n
+    if(D(k) /= 0.d0) then
+      do j = 1, n
+        do i = 1, n
+!          matrix_inv_half(i,j) = matrix_inv_half(i,j) + U(i,k) * D(k) * Vt(k,j)
+           matrix_inv_half(i,j) = matrix_inv_half(i,j) + U(i,k) * D(k) * Vt(j,k)
+           matrix_half(i,j) = matrix_half(i,j) + U(i,k) * D_half(k) * Vt(j,k)
+        enddo
+      enddo
+    endif
+  enddo
+  print*,'testing S^1/2 * S^1/2= S'
+  ! tmp1 = S^1/2 X S^1/2
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            , matrix_half, size(matrix_half, 1), matrix_half, size(matrix_half, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+   accu_d += dabs(tmp1(i,i) - matrix(i,i))
+   do j = 1, n
+    if(i==j)cycle 
+    accu_nd += dabs(tmp1(j,i) - matrix(j,i))
+   enddo
+  enddo
+  print*,'accu_d  =',accu_d
+  print*,'accu_nd =',accu_nd
+
+!  print*,'S inv half'
+!  do i = 1, n
+!   write(*, '(1000(F16.10,X))') matrix_inv_half(i,:)
+!  enddo
+
+  double precision, allocatable :: pseudo_inverse(:,:),identity(:,:)
+  allocate( pseudo_inverse(n,n),identity(n,n))
+  call get_pseudo_inverse(matrix,n,n,n,pseudo_inverse,n,threshold)
+ 
+  ! S^-1 X S = 1
+!  identity = 0.d0
+!  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+!            , matrix, size(matrix, 1), pseudo_inverse, size(pseudo_inverse, 1) &
+!            , 0.d0, identity, size(identity, 1) )
+  print*,'Checking  S^-1/2 X S^-1/2 = S^-1 ?'
+  ! S^-1/2 X S^-1/2 = S^-1 ?
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+            ,matrix_inv_half, size(matrix_inv_half, 1), matrix_inv_half, size(matrix_inv_half, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  accu_nd = 0.d0
+  accu_d  = 0.d0
+  do i = 1, n
+   accu_d += dabs(1.d0 - pseudo_inverse(i,i))
+   do j = 1, n
+    if(i==j)cycle 
+    accu_nd += dabs(tmp1(j,i) - pseudo_inverse(j,i))
+   enddo
+  enddo
+  print*,'accu_d  =',accu_d
+  print*,'accu_nd =',accu_nd
+
+  stop
+!
+!  ! ( S^-1/2 x S ) x S^-1/2
+!  Stmp2 = 0.d0
+!  call dgemm( 'N', 'N', n, n, n, 1.d0                                        &
+!            , Stmp, size(Stmp, 1), matrix_inv_half, size(matrix_inv_half, 1) &
+!            , 0.d0, Stmp2, size(Stmp2, 1) )
+
+  ! S^-1/2 x ( S^-1/2 x S )
+!  Stmp2 = 0.d0
+!  call dgemm( 'N', 'N', n, n, n, 1.d0                                        &
+!            , matrix_inv_half, size(matrix_inv_half, 1), Stmp, size(Stmp, 1) &
+!            , 0.d0, Stmp2, size(Stmp2, 1) )
+ 
+!  do i = 1, n
+!    do j = 1, n
+!      if(i==j) then
+!       accu_d += Stmp2(j,i)
+!      else 
+!       accu_nd = accu_nd + Stmp2(j,i) * Stmp2(j,i)
+!      endif
+!    enddo
+!  enddo
+!  accu_nd = dsqrt(accu_nd)
+!  print*, ' after S^-1/2: sum of off-diag S elements = ', accu_nd
+!  print*, ' after S^-1/2: sum of     diag S elements = ', accu_d
+!  do i = 1, n
+!   write(*,'(1000(F16.10,X))') Stmp2(i,:)
+!  enddo
+
+  !double precision  :: thresh
+  !thresh = 1.d-10
+  !if( accu_nd.gt.thresh .or. dabs(accu_d-dble(n)).gt.thresh) then
+  !  stop
+  !endif
+
+end subroutine get_inv_half_svd
+
+! ---
+
+subroutine get_inv_half_nonsymmat_diago(matrix, n, matrix_inv_half, complex_root)
+
+  BEGIN_DOC
+  ! input:  S = matrix
+  ! output: S^{-1/2} = matrix_inv_half obtained by diagonalization
+  !
+  ! S = VR D VL^T
+  !   = VR D^{1/2} D^{1/2} VL^T
+  !   = VR D^{1/2} VL^T VR D^{1/2} VL^T
+  !   = S^{1/2} S^{1/2} with S = VR D^{1/2} VL^T 
+  !
+  ! == > S^{-1/2} = VR D^{-1/2} VL^T
+  !
+  END_DOC
+
+  implicit none
+
+  integer,          intent(in)  :: n
+  double precision, intent(in)  :: matrix(n,n)
+  logical,          intent(out) :: complex_root
+  double precision, intent(out) :: matrix_inv_half(n,n)
+
+  integer                       :: i, j
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: WR(:), WI(:), VL(:,:), VR(:,:), S(:,:), S_diag(:)
+  double precision, allocatable :: tmp1(:,:), D_mat(:,:)
+
+  complex_root = .False.
+
+  matrix_inv_half = 0.D0
+  print*,'Computing S^{-1/2}'
+
+  allocate(WR(n), WI(n), VL(n,n), VR(n,n))
+  call lapack_diag_non_sym(n, matrix, WR, WI, VL, VR)
+
+  allocate(S(n,n))
+  call check_biorthog(n, n, VL, VR, accu_d, accu_nd, S)
+  print*,'accu_nd S^{-1/2}',accu_nd
+  if(accu_nd.gt.1.d-10) then
+    complex_root = .True. ! if vectors are not bi-orthogonal return 
+    print*,'Eigenvectors of S are not bi-orthonormal, skipping S^{-1/2}'
+    return
+  endif
+
+  allocate(S_diag(n))
+  do i = 1, n
+    S_diag(i) = 1.d0/dsqrt(S(i,i))
+    if(dabs(WI(i)).gt.1.d-20.or.WR(i).lt.0.d0)then ! check that eigenvalues are real and positive 
+     complex_root = .True.
+     print*,'Eigenvalues of S have imaginary part '
+     print*,'WR(i),WI(i)',WR(i), WR(i)
+     print*,'Skipping S^{-1/2}'
+     return
+    endif
+  enddo
+  deallocate(S)
+
+  if(complex_root) return
+
+  ! normalization of vectors 
+  do i = 1, n
+    if(S_diag(i).eq.1.d0) cycle
+    do j = 1,n
+      VL(j,i) *= S_diag(i)
+      VR(j,i) *= S_diag(i)
+    enddo
+  enddo
+  deallocate(S_diag)
+
+  allocate(tmp1(n,n), D_mat(n,n))
+
+  D_mat = 0.d0
+  do i = 1, n
+    D_mat(i,i) = 1.d0/dsqrt(WR(i))
+  enddo
+  deallocate(WR, WI)
+
+  ! tmp1 = VR D^{-1/2} 
+  tmp1 = 0.d0
+  call dgemm( 'N', 'N', n, n, n, 1.d0                &
+            , VR, size(VR, 1), D_mat, size(D_mat, 1) &
+            , 0.d0, tmp1, size(tmp1, 1) )
+  deallocate(VR, D_mat)
+
+  ! S^{-1/2} = tmp1 X VL^T 
+  matrix_inv_half = 0.d0
+  call dgemm( 'N', 'T', n, n, n, 1.d0              &
+            , tmp1, size(tmp1, 1), VL, size(VL, 1) &
+            , 0.d0, matrix_inv_half, size(matrix_inv_half, 1) )
+  deallocate(tmp1, VL)
+
+end
+
+! ---
+
+subroutine bi_ortho_s_inv_half(n,leigvec,reigvec,S_nh_inv_half)
+ implicit  none
+ integer, intent(in) :: n
+ double precision, intent(in) :: S_nh_inv_half(n,n)
+ double precision, intent(inout) :: leigvec(n,n),reigvec(n,n)
+ BEGIN_DOC 
+ ! bi-orthonormalization of left and right vectors 
+ ! 
+ ! S = VL^T VR 
+ !
+ ! S^{-1/2} S S^{-1/2} = 1 = S^{-1/2} VL^T VR S^{-1/2} = VL_new^T VR_new
+ !
+ ! VL_new = VL (S^{-1/2})^T
+ ! 
+ ! VR_new = VR S^{^{-1/2}}
+ END_DOC
+ double precision,allocatable :: vl_tmp(:,:),vr_tmp(:,:)
+ print*,'Bi-orthonormalization using S^{-1/2}'
+ allocate(vl_tmp(n,n),vr_tmp(n,n))
+ vl_tmp = leigvec
+ vr_tmp = reigvec
+ ! VL_new = VL (S^{-1/2})^T
+ call dgemm( 'N', 'T', n, n, n, 1.d0                                            &
+             , vl_tmp, size(vl_tmp, 1), S_nh_inv_half, size(S_nh_inv_half, 1) &
+             , 0.d0, leigvec, size(leigvec, 1) )
+ ! VR_new = VR S^{^{-1/2}}
+ call dgemm( 'N', 'N', n, n, n, 1.d0                                            &
+             , vr_tmp, size(vr_tmp, 1), S_nh_inv_half, size(S_nh_inv_half, 1) &
+             , 0.d0, reigvec, size(reigvec, 1) )
+ double precision :: accu_d, accu_nd
+ double precision,allocatable :: S(:,:) 
+ allocate(S(n,n)) 
+ call check_biorthog(n, n, leigvec, reigvec, accu_d, accu_nd, S)
+ if(dabs(accu_d - n).gt.1.d-10 .or. accu_nd .gt.1.d-8 )then
+  print*,'Pb in bi_ortho_s_inv_half !!'
+  print*,'accu_d =',accu_d
+  print*,'accu_nd =',accu_nd
+  stop
+ endif
+end
diff --git a/src/tc_bi_ortho/compute_deltamu_right.irp.f b/src/tc_bi_ortho/compute_deltamu_right.irp.f
index 6464796e..7ca2c890 100644
--- a/src/tc_bi_ortho/compute_deltamu_right.irp.f
+++ b/src/tc_bi_ortho/compute_deltamu_right.irp.f
@@ -42,7 +42,7 @@ subroutine delta_right()
 
   enddo
 
-  call ezfio_set_dmc_dress_dmc_delta_h(delta)
+!  call ezfio_set_dmc_dress_dmc_delta_h(delta)
 
   deallocate(delta)
 
diff --git a/src/tc_bi_ortho/print_tc_wf.irp.f b/src/tc_bi_ortho/print_tc_wf.irp.f
index 58a733a7..e6ea5965 100644
--- a/src/tc_bi_ortho/print_tc_wf.irp.f
+++ b/src/tc_bi_ortho/print_tc_wf.irp.f
@@ -24,7 +24,7 @@ subroutine write_l_r_wf
  output=trim(ezfio_filename)//'.tc_wf'
  i_unit_output = getUnitAndOpen(output,'w')
  integer :: i
- print*,'Writing the left-right wf'
+ print*,'Writing the left-right wf, and usual psi'
  do i = 1, N_det
   write(i_unit_output,*)i,psi_l_coef_sorted_bi_ortho_left(i),psi_r_coef_sorted_bi_ortho_right(i)
  enddo
diff --git a/src/tc_bi_ortho/select_dets_bi_ortho.irp.f b/src/tc_bi_ortho/select_dets_bi_ortho.irp.f
deleted file mode 100644
index e6bf3d6e..00000000
--- a/src/tc_bi_ortho/select_dets_bi_ortho.irp.f
+++ /dev/null
@@ -1,61 +0,0 @@
-program tc_bi_ortho
-  implicit none
-  BEGIN_DOC
-! TODO : Put the documentation of the program here
-  END_DOC
-  print *, 'Hello world'
-  my_grid_becke = .True.
-  my_n_pt_r_grid = 30
-  my_n_pt_a_grid = 50
-  read_wf = .True.
-  touch read_wf
-  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
-
-  !!!!!!!!!!!!!!! WARNING NO 3-BODY
-  !!!!!!!!!!!!!!! WARNING NO 3-BODY
-  three_body_h_tc = .False.
-  touch three_body_h_tc 
-  !!!!!!!!!!!!!!! WARNING NO 3-BODY
-  !!!!!!!!!!!!!!! WARNING NO 3-BODY
-
-  call routine_test
-! call test
-end
-
-subroutine routine_test
- implicit none
- use bitmasks ! you need to include the bitmasks_module.f90 features
- integer :: i,n_good,degree
- integer(bit_kind), allocatable :: dets(:,:,:)
- integer, allocatable :: iorder(:)
- double precision, allocatable :: coef(:),coef_new(:,:)
- double precision :: thr
- allocate(coef(N_det), iorder(N_det))
- do i = 1, N_det
-  iorder(i) = i
-  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
-  if(degree==1)then
-   coef(i) = -0.5d0
-  else
-   coef(i) = -dabs(coef_pt1_bi_ortho(i))
-  endif
- enddo
- call dsort(coef,iorder,N_det)
- !thr = save_threshold
- thr = 1d-15
- n_good = 0
- do i = 1, N_det
-  if(dabs(coef(i)).gt.thr)then
-   n_good += 1
-  endif
- enddo
- print*,'n_good = ',n_good
- allocate(dets(N_int,2,n_good),coef_new(n_good,n_states))
- do i = 1, n_good
-  dets(:,:,i) = psi_det(:,:,iorder(i))
-  coef_new(i,:) = psi_coef(iorder(i),:)
- enddo
- call save_wavefunction_general(n_good,n_states,dets,n_good,coef_new)
-
-
-end
diff --git a/src/tc_keywords/EZFIO.cfg b/src/tc_keywords/EZFIO.cfg
index eedede5b..6bf47252 100644
--- a/src/tc_keywords/EZFIO.cfg
+++ b/src/tc_keywords/EZFIO.cfg
@@ -105,3 +105,21 @@ type: integer
 doc: if +1: only positive is selected, -1: only negative is selected, :0 both positive and negative
 interface: ezfio,provider,ocaml
 default: 0
+
+[j1b_pen]
+type: double precision
+doc: exponents of the 1-body Jastrow
+interface: ezfio
+size: (nuclei.nucl_num)
+
+[j1b_coeff]
+type: double precision
+doc: coeff of the 1-body Jastrow
+interface: ezfio
+size: (nuclei.nucl_num)
+
+[j1b_type]
+type: integer
+doc: type of 1-body Jastrow
+interface: ezfio, provider, ocaml
+default: 0
diff --git a/src/tc_keywords/NEED b/src/tc_keywords/NEED
index 5a3182ed..f1c051ff 100644
--- a/src/tc_keywords/NEED
+++ b/src/tc_keywords/NEED
@@ -1 +1,2 @@
 ezfio_files
+nuclei
diff --git a/src/ao_tc_eff_map/j1b_pen.irp.f b/src/tc_keywords/j1b_pen.irp.f
similarity index 88%
rename from src/ao_tc_eff_map/j1b_pen.irp.f
rename to src/tc_keywords/j1b_pen.irp.f
index f829108b..207a9134 100644
--- a/src/ao_tc_eff_map/j1b_pen.irp.f
+++ b/src/tc_keywords/j1b_pen.irp.f
@@ -13,7 +13,7 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ]
   PROVIDE ezfio_filename
 
   if (mpi_master) then
-    call ezfio_has_ao_tc_eff_map_j1b_pen(exists)
+    call ezfio_has_tc_keywords_j1b_pen(exists)
   endif
 
   IRP_IF MPI_DEBUG
@@ -34,7 +34,7 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ]
 
     if (mpi_master) then
       write(6,'(A)') '.. >>>>> [ IO READ: j1b_pen ] <<<<< ..'
-      call ezfio_get_ao_tc_eff_map_j1b_pen(j1b_pen)
+      call ezfio_get_tc_keywords_j1b_pen(j1b_pen)
       IRP_IF MPI
         call MPI_BCAST(j1b_pen, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
         if (ierr /= MPI_SUCCESS) then
@@ -47,10 +47,11 @@ BEGIN_PROVIDER [ double precision, j1b_pen, (nucl_num) ]
  
     integer :: i
     do i = 1, nucl_num
-      j1b_pen(i) = 1d5
+      j1b_pen(i) = 100.d0
     enddo
 
   endif
+  print*,'j1b_pen = ',j1b_pen
 
 END_PROVIDER
 
@@ -68,7 +69,7 @@ BEGIN_PROVIDER [ double precision, j1b_coeff, (nucl_num) ]
   PROVIDE ezfio_filename
 
   if (mpi_master) then
-    call ezfio_has_ao_tc_eff_map_j1b_coeff(exists)
+    call ezfio_has_tc_keywords_j1b_coeff(exists)
   endif
 
   IRP_IF MPI_DEBUG
@@ -89,7 +90,7 @@ BEGIN_PROVIDER [ double precision, j1b_coeff, (nucl_num) ]
 
     if (mpi_master) then
       write(6,'(A)') '.. >>>>> [ IO READ: j1b_coeff ] <<<<< ..'
-      call ezfio_get_ao_tc_eff_map_j1b_coeff(j1b_coeff)
+      call ezfio_get_tc_keywords_j1b_coeff(j1b_coeff)
       IRP_IF MPI
         call MPI_BCAST(j1b_coeff, (nucl_num), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
         if (ierr /= MPI_SUCCESS) then
@@ -110,3 +111,4 @@ BEGIN_PROVIDER [ double precision, j1b_coeff, (nucl_num) ]
 END_PROVIDER
 
 ! ---
+
diff --git a/src/tc_keywords/tc_keywords.irp.f b/src/tc_keywords/tc_keywords.irp.f
index 3bc68550..d8b862f5 100644
--- a/src/tc_keywords/tc_keywords.irp.f
+++ b/src/tc_keywords/tc_keywords.irp.f
@@ -4,4 +4,5 @@ program tc_keywords
 ! TODO : Put the documentation of the program here
   END_DOC
   print *, 'Hello world'
+ provide j1b_pen
 end