Merge pull request #245 from QuantumPackage/dev-stable-tc-scf

Dev stable tc scf
2024-07-03 01:46:05 +02:00 · 2023-02-08 16:33:30 +01:00 · 2023-02-08 16:33:30 +01:00 · 81ca29427a
commit 81ca29427a
parent 669e91c0da bf7d63e1e9
120 changed files with 17040 additions and 3034 deletions
--- a/src/cipsi_tc_bi_ortho/EZFIO.cfg
+++ b/src/cipsi_tc_bi_ortho/EZFIO.cfg
@ -0,0 +1,36 @@
+[save_wf_after_selection]
+type: logical
+doc: If true, saves the wave function after the selection, before the diagonalization
+interface: ezfio,provider,ocaml
+default: False
+
+[seniority_max]
+type: integer
+doc: Maximum number of allowed open shells. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
+[excitation_ref]
+type: integer
+doc: 1: Hartree-Fock determinant, 2:All determinants of the dominant configuration
+interface: ezfio,ocaml,provider
+default: 1
+
+[excitation_max]
+type: integer
+doc: Maximum number of excitation with respect to the Hartree-Fock determinant. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
+[excitation_alpha_max]
+type: integer
+doc: Maximum number of excitation for alpha determinants with respect to the Hartree-Fock determinant. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
+[excitation_beta_max]
+type: integer
+doc: Maximum number of excitation for beta determinants with respect to the Hartree-Fock determinant. Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
--- a/src/cipsi_tc_bi_ortho/NEED
+++ b/src/cipsi_tc_bi_ortho/NEED
@ -0,0 +1,6 @@
+mpi
+perturbation
+zmq
+iterations_tc
+csf
+tc_bi_ortho
--- a/src/cipsi_tc_bi_ortho/cipsi.irp.f
+++ b/src/cipsi_tc_bi_ortho/cipsi.irp.f
@ -0,0 +1,136 @@
+subroutine run_cipsi
+
+  BEGIN_DOC
+  ! Selected Full Configuration Interaction with deterministic selection and
+  ! stochastic PT2.
+  END_DOC
+
+  use selection_types
+
+  implicit none
+
+  integer                        :: i,j,k,ndet
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  double precision, allocatable  :: zeros(:)
+  integer                        :: to_select
+  logical, external :: qp_stop
+
+  double precision :: threshold_generators_save
+  double precision :: rss
+  double precision, external :: memory_of_double
+  double precision :: correlation_energy_ratio,E_denom,E_tc,norm
+
+  PROVIDE H_apply_buffer_allocated distributed_davidson
+
+  print*,'Diagonal elements of the Fock matrix '
+  do i = 1, mo_num
+   write(*,*)i,Fock_matrix_tc_mo_tot(i,i)
+  enddo
+
+  N_iter = 1
+  threshold_generators = 1.d0
+  SOFT_TOUCH threshold_generators
+
+  rss = memory_of_double(N_states)*4.d0
+  call check_mem(rss,irp_here)
+
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+
+  double precision               :: hf_energy_ref
+  logical                        :: has, print_pt2
+  double precision               :: relative_error
+
+  relative_error=PT2_relative_error
+
+  zeros = 0.d0
+  pt2_data % pt2  = -huge(1.e0)
+  pt2_data % rpt2 = -huge(1.e0)
+  pt2_data % overlap(:,:) = 0.d0
+  pt2_data % variance = huge(1.e0)
+
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  print_pt2 = .False.
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+
+  call ezfio_has_hartree_fock_energy(has)
+  if (has) then
+    call ezfio_get_hartree_fock_energy(hf_energy_ref)
+  else
+    hf_energy_ref = ref_bitmask_energy
+  endif
+
+  if (N_det > N_det_max) then
+    psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det)
+    psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states)
+    N_det = N_det_max
+    soft_touch N_det psi_det psi_coef
+    if (s2_eig) then
+      call make_s2_eigenfunction
+    endif
+    print_pt2 = .False.
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!    call routine_save_right
+  endif
+
+  correlation_energy_ratio = 0.d0
+
+  print_pt2 = .True.
+  do while (                                                         &
+        (N_det < N_det_max) .and.                                    &
+        (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max)          &
+        )
+      write(*,'(A)')  '--------------------------------------------------------------------------------'
+
+
+    to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
+    to_select = max(N_states_diag, to_select)
+
+    E_denom = E_tc ! TC Energy of the current wave function 
+    if (do_pt2) then
+      call pt2_dealloc(pt2_data)
+      call pt2_dealloc(pt2_data_err)
+      call pt2_alloc(pt2_data, N_states)
+      call pt2_alloc(pt2_data_err, N_states)
+      threshold_generators_save = threshold_generators
+      threshold_generators = 1.d0
+      SOFT_TOUCH threshold_generators
+      call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
+      threshold_generators = threshold_generators_save
+      SOFT_TOUCH threshold_generators
+    else
+      call pt2_dealloc(pt2_data)
+      call pt2_alloc(pt2_data, N_states)
+      call ZMQ_selection(to_select, pt2_data)
+    endif
+
+    N_iter += 1
+
+    if (qp_stop()) exit
+
+    ! Add selected determinants
+    call copy_H_apply_buffer_to_wf()
+
+    if (save_wf_after_selection) then
+      call save_wavefunction
+    endif
+
+    PROVIDE  psi_coef
+    PROVIDE  psi_det
+    PROVIDE  psi_det_sorted_tc
+
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+    if (qp_stop()) exit
+  enddo
+
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+  call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+
+end
--- a/src/cipsi_tc_bi_ortho/energy.irp.f
+++ b/src/cipsi_tc_bi_ortho/energy.irp.f
@ -0,0 +1,51 @@
+BEGIN_PROVIDER [ logical, initialize_pt2_E0_denominator ]
+ implicit none
+ BEGIN_DOC
+ ! If true, initialize pt2_E0_denominator
+ END_DOC
+ initialize_pt2_E0_denominator = .True.
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! E0 in the denominator of the PT2
+ END_DOC
+ integer :: i,j
+
+  pt2_E0_denominator = eigval_right_tc_bi_orth
+
+! if (initialize_pt2_E0_denominator) then
+!   if (h0_type == "EN") then
+!     pt2_E0_denominator(1:N_states) = psi_energy(1:N_states)
+!   else if (h0_type == "HF") then
+!     do i=1,N_states
+!       j = maxloc(abs(psi_coef(:,i)),1)
+!       pt2_E0_denominator(i) = psi_det_hii(j)
+!     enddo
+!   else if (h0_type == "Barycentric") then
+!     pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states)
+!   else if (h0_type == "CFG") then
+!     pt2_E0_denominator(1:N_states) = psi_energy(1:N_states)
+!   else
+!     print *,  h0_type, ' not implemented'
+!     stop
+!   endif
+!  do i=1,N_states
+!    call write_double(6,pt2_E0_denominator(i)+nuclear_repulsion, 'PT2 Energy denominator')
+!  enddo
+! else
+!   pt2_E0_denominator = -huge(1.d0)
+! endif
+
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, pt2_overlap, (N_states, N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Overlap between the perturbed wave functions
+ END_DOC
+ pt2_overlap(1:N_states,1:N_states) = 0.d0
+END_PROVIDER
+
--- a/src/cipsi_tc_bi_ortho/environment.irp.f
+++ b/src/cipsi_tc_bi_ortho/environment.irp.f
@ -0,0 +1,14 @@
+BEGIN_PROVIDER [ integer, nthreads_pt2 ]
+ implicit none
+ BEGIN_DOC
+ ! Number of threads for Davidson
+ END_DOC
+ nthreads_pt2 = nproc
+ character*(32) :: env
+ call getenv('QP_NTHREADS_PT2',env)
+ if (trim(env) /= '') then
+   read(env,*) nthreads_pt2
+   call write_int(6,nthreads_pt2,'Target number of threads for PT2')
+ endif
+END_PROVIDER
+
--- a/src/cipsi_tc_bi_ortho/fock_diag.irp.f
+++ b/src/cipsi_tc_bi_ortho/fock_diag.irp.f
@ -0,0 +1,95 @@
+subroutine build_fock_tmp_tc(fock_diag_tmp,det_ref,Nint)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+! Build the diagonal of the Fock matrix corresponding to a generator
+! determinant. $F_{00}$ is $\langle i|H|i \rangle = E_0$.
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: det_ref(Nint,2)
+  double precision, intent(out)  :: fock_diag_tmp(2,mo_num+1)
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: ne(2), i, j, ii, jj
+  double precision               :: E0
+
+  ! Compute Fock matrix diagonal elements
+  call bitstring_to_list_ab(det_ref,occ,Ne,Nint)
+
+  fock_diag_tmp = 0.d0
+  E0 = 0.d0
+
+  if (Ne(1) /= elec_alpha_num) then
+    print *,  'Error in build_fock_tmp_tc (alpha)', Ne(1), Ne(2)
+    call debug_det(det_ref,N_int)
+    stop -1
+  endif
+  if (Ne(2) /= elec_beta_num) then
+    print *, 'Error in build_fock_tmp_tc (beta)', Ne(1), Ne(2)
+    call debug_det(det_ref,N_int)
+    stop -1
+  endif
+
+  ! Occupied MOs
+  do ii=1,elec_alpha_num
+    i = occ(ii,1)
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
+    E0 = E0 + mo_one_e_integrals(i,i)
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      if (i==j) cycle
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
+      E0 = E0 + 0.5d0*mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj(i,j)
+      E0 = E0 + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+  do ii=1,elec_beta_num
+    i = occ(ii,2)
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
+    E0 = E0 + mo_one_e_integrals(i,i)
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      if (i==j) cycle
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
+      E0 = E0 + 0.5d0*mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+
+  ! Virtual MOs
+  do i=1,mo_num
+    if (fock_diag_tmp(1,i) /= 0.d0) cycle
+    fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_one_e_integrals(i,i)
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      fock_diag_tmp(1,i) = fock_diag_tmp(1,i) + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+  do i=1,mo_num
+    if (fock_diag_tmp(2,i) /= 0.d0) cycle
+    fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_one_e_integrals(i,i)
+    do jj=1,elec_beta_num
+      j = occ(jj,2)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj_anti(i,j)
+    enddo
+    do jj=1,elec_alpha_num
+      j = occ(jj,1)
+      fock_diag_tmp(2,i) = fock_diag_tmp(2,i) + mo_two_e_integrals_jj(i,j)
+    enddo
+  enddo
+
+  fock_diag_tmp(1,mo_num+1) = E0
+  fock_diag_tmp(2,mo_num+1) = E0
+
+end
--- a/src/cipsi_tc_bi_ortho/get_d.irp.f
+++ b/src/cipsi_tc_bi_ortho/get_d.irp.f
--- a/src/cipsi_tc_bi_ortho/get_d0_good.irp.f
+++ b/src/cipsi_tc_bi_ortho/get_d0_good.irp.f
@ -0,0 +1,139 @@
+subroutine get_d0_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs)
+  !todo: indices/conjg should be okay for complex
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind) :: det(N_int, 2)
+  double precision, intent(in) :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num)
+  double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  integer :: i, j, k, s, h1, h2, p1, p2, puti, putj, mm
+  double precision :: phase
+  double precision :: hij,hji
+  double precision, external :: get_phase_bi
+  logical :: ok
+
+  integer, parameter :: bant=1
+  double precision, allocatable :: hij_cache1(:), hij_cache2(:)
+  allocate (hij_cache1(mo_num),hij_cache2(mo_num))
+  double precision, allocatable :: hji_cache1(:), hji_cache2(:)
+  allocate (hji_cache1(mo_num),hji_cache2(mo_num))
+!  print*,'in get_d0_new'
+!  call debug_det(gen,N_int)
+!  print*,'coefs',coefs(1,:)
+
+  if(sp == 3) then ! AB
+    h1 = p(1,1)
+    h2 = p(1,2)
+    do p1=1, mo_num
+      if(bannedOrb(p1, 1)) cycle
+!      call get_mo_two_e_integrals_complex(p1,h2,h1,mo_num,hij_cache1,mo_integrals_map)
+      do mm = 1, mo_num
+       hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,p1,h2,h1)
+       hji_cache1(mm) = mo_bi_ortho_tc_two_e(h2,h1,mm,p1)
+      enddo
+      !!!!!!!!!! <alpha|H|psi>
+      do p2=1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, bant)) cycle ! rentable?
+        if(p1 == h1 .or. p2 == h2) then
+          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
+          ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this
+!          call i_h_j_complex(det, gen, N_int, hij)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij)
+        else
+          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          hij = hij_cache1(p2) * phase
+        end if
+        if (hij == (0.d0,0.d0)) cycle
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij  ! HOTSPOT
+        enddo
+      end do
+      !!!!!!!!!! <phi|H|alpha>
+      do p2=1, mo_num
+        if(bannedOrb(p2,2)) cycle
+        if(banned(p1, p2, bant)) cycle ! rentable?
+        if(p1 == h1 .or. p2 == h2) then
+          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
+          ! call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this
+!          call i_h_j_complex(det, gen, N_int, hij)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji)
+        else
+          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          hji = hji_cache1(p2) * phase
+        end if
+        if (hji == (0.d0,0.d0)) cycle
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji  ! HOTSPOT
+        enddo
+      end do
+    end do
+
+  else ! AA BB
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti=1, mo_num
+      if(bannedOrb(puti, sp)) cycle
+!      call get_mo_two_e_integrals_complex(puti,p2,p1,mo_num,hij_cache1,mo_integrals_map,mo_integrals_map_2)
+!      call get_mo_two_e_integrals_complex(puti,p1,p2,mo_num,hij_cache2,mo_integrals_map,mo_integrals_map_2)
+      do mm = 1, mo_num
+       hij_cache1(mm) = mo_bi_ortho_tc_two_e(mm,puti,p2,p1)
+       hij_cache2(mm) = mo_bi_ortho_tc_two_e(mm,puti,p1,p2)
+       hji_cache1(mm) = mo_bi_ortho_tc_two_e(p2,p1,mm,puti)
+       hji_cache2(mm) = mo_bi_ortho_tc_two_e(p1,p2,mm,puti)
+      enddo
+      !!!!!!!!!! <alpha|H|psi>
+      do putj=puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, bant)) cycle ! rentable?
+        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
+          !call i_h_j_complex(gen, det, N_int, hij) ! need to take conjugate of this
+!          call i_h_j_complex(det, gen, N_int, hij)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(det,gen,N_int, hij)
+          if (hij == 0.d0) cycle
+        else
+!          hij = (mo_two_e_integral_complex(p1, p2, puti, putj) -  mo_two_e_integral_complex(p2, p1, puti, putj))
+!          hij = (mo_bi_ortho_tc_two_e(p1, p2, puti, putj) -  mo_bi_ortho_tc_two_e(p2, p1, puti, putj))
+          hij = (mo_bi_ortho_tc_two_e(puti, putj, p1, p2) -  mo_bi_ortho_tc_two_e(puti, putj, p2, p1))
+          if (hij == 0.d0) cycle
+          hij = (hij) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+        end if
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij
+        enddo
+      end do
+
+      !!!!!!!!!! <phi|H|alpha>
+      do putj=puti+1, mo_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, bant)) cycle ! rentable?
+        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
+          call htilde_mu_mat_opt_bi_ortho_no_3e(gen,det,N_int, hji)
+          if (hji == 0.d0) cycle
+        else
+          hji = (mo_bi_ortho_tc_two_e( p1, p2, puti, putj) -  mo_bi_ortho_tc_two_e( p2, p1, puti, putj))
+          if (hji == 0.d0) cycle
+          hji = (hji) * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+        end if
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji
+        enddo
+      end do
+    end do
+  end if
+
+  deallocate(hij_cache1,hij_cache2)
+end
+
--- a/src/cipsi_tc_bi_ortho/get_d1_good.irp.f
+++ b/src/cipsi_tc_bi_ortho/get_d1_good.irp.f
@ -0,0 +1,454 @@
+subroutine get_d1_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs)
+  !todo: indices should be okay for complex?
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in)  :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision, intent(in)   :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num)
+  double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision, external     :: get_phase_bi
+  double precision, external     :: mo_two_e_integral_complex
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib, k, l, mm
+
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer                        :: bant
+  double precision, allocatable :: hij_cache(:,:)
+  double precision               :: hij, tmp_rowij(N_states, mo_num), tmp_rowij2(N_states, mo_num)
+  double precision, allocatable :: hji_cache(:,:)
+  double precision               :: hji, tmp_rowji(N_states, mo_num), tmp_rowji2(N_states, mo_num)
+!  PROVIDE mo_integrals_map N_int
+!  print*,'in get_d1_new'
+!  call debug_det(gen,N_int)
+!  print*,'coefs',coefs(1,:)
+
+  allocate (lbanned(mo_num, 2))
+  allocate (hij_cache(mo_num,2))
+  allocate (hji_cache(mo_num,2))
+  lbanned = bannedOrb
+
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+
+  bant = 1
+
+  if(sp == 3) then
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+    if(.not. bannedOrb(puti, mi)) then
+!      call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+!      call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      do mm = 1, mo_num
+       hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2)
+       hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1)
+       hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,p2,mm,hfix)
+       hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,p1,mm,hfix)
+      enddo
+      !! <alpha|H|psi>
+      tmp_rowij = 0.d0
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,1) - hij_cache(putj,2)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,2) - hij_cache(putj,1)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowij(k,putj) = tmp_rowij(k,putj) + hij * coefs(k,1)
+          enddo
+        endif
+      end do
+
+      if(ma == 1) then
+        mat_r(1:N_states,1:mo_num,puti) = mat_r(1:N_states,1:mo_num,puti) + tmp_rowij(1:N_states,1:mo_num)
+      else
+        do l=1,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k,puti,l) = mat_r(k,puti,l) + tmp_rowij(k,l)
+          enddo
+        enddo
+      end if
+
+      !! <phi|H|alpha>
+      tmp_rowji = 0.d0
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hji = hji_cache(putj,1) - hji_cache(putj,2)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hji = hji_cache(putj,2) - hji_cache(putj,1)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowji(k,putj) = tmp_rowji(k,putj) + hji * coefs(k,2)
+          enddo
+        endif
+      end do
+
+      if(ma == 1) then
+        mat_l(1:N_states,1:mo_num,puti) = mat_l(1:N_states,1:mo_num,puti) + tmp_rowji(1:N_states,1:mo_num)
+      else
+        do l=1,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k,puti,l) = mat_l(k,puti,l) + tmp_rowji(k,l)
+          enddo
+        enddo
+      end if
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+    tmp_rowij  = 0.d0
+    tmp_rowij2 = 0.d0
+    tmp_rowji  = 0.d0
+    tmp_rowji2 = 0.d0
+!    call get_mo_two_e_integrals_complex(hfix,pfix,p1,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+!    call get_mo_two_e_integrals_complex(hfix,pfix,p2,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+    do mm = 1, mo_num
+     hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p1)
+     hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,pfix,p2)
+     hji_cache(mm,1) = mo_bi_ortho_tc_two_e(pfix,p1,mm,hfix)
+     hji_cache(mm,2) = mo_bi_ortho_tc_two_e(pfix,p2,mm,hfix)
+    enddo
+    putj = p1
+    !! <alpha|H|psi>
+    do puti=1,mo_num !HOT
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,2)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1)
+          enddo
+        endif
+      end if
+!      
+      putj = p2
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,1)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1)
+          enddo
+        endif
+      end if
+    end do
+
+    if(mi == 1) then
+      mat_r(:,:,p1) = mat_r(:,:,p1) + tmp_rowij(:,:)
+      mat_r(:,:,p2) = mat_r(:,:,p2) + tmp_rowij2(:,:)
+    else
+      do l=1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij(k,l)
+          mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij2(k,l)
+        enddo
+      enddo
+    end if
+
+    putj = p1
+    !! <phi|H|alpha>
+    do puti=1,mo_num !HOT
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hji = hji_cache(puti,2)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2)
+          enddo
+        endif
+      end if
+!      
+      putj = p2
+      if(.not. banned(putj,puti,bant)) then
+        hji = hji_cache(puti,1)
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2)
+          enddo
+        endif
+      end if
+    end do
+
+    if(mi == 1) then
+      mat_l(:,:,p1) = mat_l(:,:,p1) + tmp_rowji(:,:)
+      mat_l(:,:,p2) = mat_l(:,:,p2) + tmp_rowji2(:,:)
+    else
+      do l=1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji(k,l)
+          mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji2(k,l)
+        enddo
+      enddo
+    end if
+
+  else  ! sp /= 3
+
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+!        call get_mo_two_e_integrals_complex(hfix,p1,p2,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+!        call get_mo_two_e_integrals_complex(hfix,p2,p1,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+        do mm = 1, mo_num
+         hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,p2)
+         hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,p1)
+         hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,p2,mm,hfix)
+         hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,p1,mm,hfix)
+        enddo
+    !! <alpha|H|psi>
+        tmp_rowij = 0.d0
+        do putj=1,hfix-1
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,1) - hij_cache(putj,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1)
+          endif
+        end do
+        do putj=hfix+1,mo_num
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hij = hij_cache(putj,2) - hij_cache(putj,1)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_rowij(:,putj) = tmp_rowij(:,putj) + hij * coefs(:,1)
+          endif
+        end do
+
+        mat_r(:, :puti-1, puti) = mat_r(:, :puti-1, puti) + tmp_rowij(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, puti, l) = mat_r(k, puti,l) + tmp_rowij(k,l)
+          enddo
+        enddo
+    !! <phi|H|alpha>
+        tmp_rowji = 0.d0
+        do putj=1,hfix-1
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hji = hji_cache(putj,1) - hji_cache(putj,2)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2)
+          endif
+        end do
+        do putj=hfix+1,mo_num
+          if(banned(putj,puti,1)) cycle
+          if(lbanned(putj,ma)) cycle
+          hji = hji_cache(putj,2) - hji_cache(putj,1)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_rowji(:,putj) = tmp_rowji(:,putj) + hji * coefs(:,2)
+          endif
+        end do
+
+        mat_l(:, :puti-1, puti) = mat_l(:, :puti-1, puti) + tmp_rowji(:,:puti-1)
+        do l=puti,mo_num
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, l) = mat_l(k, puti,l) + tmp_rowji(k,l)
+          enddo
+        enddo
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+      tmp_rowij =  0.d0
+      tmp_rowij2 = 0.d0
+      tmp_rowji =  0.d0
+      tmp_rowji2 = 0.d0
+!      call get_mo_two_e_integrals_complex(hfix,p1,pfix,mo_num,hij_cache(1,1),mo_integrals_map,mo_integrals_map_2)
+!      call get_mo_two_e_integrals_complex(hfix,p2,pfix,mo_num,hij_cache(1,2),mo_integrals_map,mo_integrals_map_2)
+      do mm = 1, mo_num
+       hij_cache(mm,1) = mo_bi_ortho_tc_two_e(mm,hfix,p1,pfix)
+       hij_cache(mm,2) = mo_bi_ortho_tc_two_e(mm,hfix,p2,pfix)
+       hji_cache(mm,1) = mo_bi_ortho_tc_two_e(p1,pfix,mm,hfix)
+       hji_cache(mm,2) = mo_bi_ortho_tc_two_e(p2,pfix,mm,hfix)
+      enddo
+      putj = p2
+    !! <alpha|H|psi>
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,1)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_rowij(k,puti) = tmp_rowij(k,puti) + hij * coefs(k,1)
+            enddo
+          endif
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_rowij2(k,puti) = tmp_rowij2(k,puti) + hij * coefs(k,1)
+            enddo
+          endif
+        end if
+      end do
+      mat_r(:,:p2-1,p2) = mat_r(:,:p2-1,p2) + tmp_rowij(:,:p2-1)
+      do l=p2,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_r(k,p2,l) = mat_r(k,p2,l) + tmp_rowij(k,l)
+        enddo
+      enddo
+      mat_r(:,:p1-1,p1) = mat_r(:,:p1-1,p1) + tmp_rowij2(:,:p1-1)
+      do l=p1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_r(k,p1,l) = mat_r(k,p1,l) + tmp_rowij2(k,l)
+        enddo
+      enddo
+
+
+    !! <phi|H|alpha>
+      putj = p2
+      do puti=1,mo_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hji = hji_cache(puti,1)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              tmp_rowji(k,puti) = tmp_rowji(k,puti) + hji * coefs(k,2)
+            enddo
+          endif
+        end if
+
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hji = hji_cache(puti,2)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_rowji2(k,puti) = tmp_rowji2(k,puti) + hji * coefs(k,2)
+            enddo
+          endif
+        end if
+      end do
+      mat_l(:,:p2-1,p2) = mat_l(:,:p2-1,p2) + tmp_rowji(:,:p2-1)
+      do l=p2,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_l(k,p2,l) = mat_l(k,p2,l) + tmp_rowji(k,l)
+        enddo
+      enddo
+      mat_l(:,:p1-1,p1) = mat_l(:,:p1-1,p1) + tmp_rowji2(:,:p1-1)
+      do l=p1,mo_num
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          mat_l(k,p1,l) = mat_l(k,p1,l) + tmp_rowji2(k,l)
+        enddo
+      enddo
+    end if
+  end if
+  deallocate(lbanned,hij_cache, hji_cache)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1=1,p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      do i2=ib,p(0,s2)
+        p1 = p(i1,s1)
+        p2 = p(i2,s2)
+        if(bannedOrb(p1, s1) .or. bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+        ! gen is a selector; mask is ionized generator; det is alpha
+        ! hij is contribution to <psi|H|alpha>
+!        call i_h_j_complex(gen, det, N_int, hij)
+        call htilde_mu_mat_opt_bi_ortho_no_3e(det, gen, N_int, hij)
+        call htilde_mu_mat_opt_bi_ortho_no_3e(gen, det, N_int, hji)
+        !DIR$ LOOP COUNT AVG(4)
+        do k=1,N_states
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!          mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * dconjg(hij)
+          mat_r(k, p1, p2) = mat_r(k, p1, p2) + coefs(k,1) * hij
+          mat_l(k, p1, p2) = mat_l(k, p1, p2) + coefs(k,2) * hji
+        enddo
+      end do
+    end do
+end
+
--- a/src/cipsi_tc_bi_ortho/get_d2_good.irp.f
+++ b/src/cipsi_tc_bi_ortho/get_d2_good.irp.f
@ -0,0 +1,308 @@
+
+subroutine get_d2_new(gen, phasemask, bannedOrb, banned, mat_l, mat_r, mask, h, p, sp, coefs)
+  !todo: indices/conjg should be correct for complex
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_num, 2), banned(mo_num, mo_num,2)
+  double precision, intent(in) :: coefs(N_states,2)
+  double precision, intent(inout) :: mat_r(N_states, mo_num, mo_num)
+  double precision, intent(inout) :: mat_l(N_states, mo_num, mo_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+
+  double precision, external :: get_phase_bi
+
+  integer :: i, j, k, tip, ma, mi, puti, putj
+  integer :: h1, h2, p1, p2, i1, i2
+  double precision :: phase
+  double precision :: hij,hji
+
+  integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
+  integer, parameter :: turn2(2) = (/2, 1/)
+  integer, parameter :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+
+  integer :: bant
+  bant = 1
+!  print*, 'in get_d2_new'
+!  call debug_det(gen,N_int)
+!  print*,'coefs',coefs(1,:)
+
+  tip = p(0,1) * p(0,2) ! number of alpha particles times number of beta particles
+
+  ma = sp !1:(alpha,alpha); 2:(b,b); 3:(a,b)
+  if(p(0,1) > p(0,2)) ma = 1 ! more alpha particles than beta particles
+  if(p(0,1) < p(0,2)) ma = 2 ! fewer alpha particles than beta particles
+  mi = mod(ma, 2) + 1
+
+  if(sp == 3) then ! if one alpha and one beta xhole 
+    !(where xholes refer to the ionizations from the generator, not the holes occupied in the ionized generator)
+    if(ma == 2) bant = 2 ! if more beta particles than alpha particles
+
+    if(tip == 3) then ! if 3 of one particle spin and 1 of the other particle spin
+      puti = p(1, mi)
+      if(bannedOrb(puti, mi)) return
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+
+      !! <alpha|H|psi>
+      do i = 1, 3    ! loop over all 3 combinations of 2 particles with spin ma
+        putj = p(i, ma)
+        if(banned(putj,puti,bant)) cycle
+        i1 = turn3(1,i)
+        i2 = turn3(2,i)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        
+     ! |G> = |psi_{gen,i}>
+     ! |G'> = a_{x1} a_{x2} |G>
+     ! |alpha> = a_{puti}^{\dagger} a_{putj}^{\dagger} |G'>
+     ! |alpha> = t_{x1,x2}^{puti,putj} |G>
+     ! hij = <psi_{selectors,i}|H|alpha>
+     ! |alpha> = t_{p1,p2}^{h1,h2}|psi_{selectors,i}>
+        !todo: <i|H|j>  =  (<h1,h2|p1,p2> - <h1,h2|p2,p1>) * phase
+        !    <psi|H|j> +=  dconjg(c_i) * <i|H|j>
+        !      <j|H|i>  =  (<p1,p2|h1,h2> - <p2,p1|h1,h2>) * phase
+        !    <j|H|psi> +=  <j|H|i> * c_i
+!        hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2, p1, h1, h2)
+
+!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! 
+        ! take the transpose of what's written above because later use the complex conjugate 
+        hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e( h1, h2, p2, p1)
+        if (hij == 0.d0) cycle
+
+        ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!        hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+        hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+
+        if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij
+          enddo
+        else            ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij
+          enddo
+        end if
+      end do
+      !! <phi|H|alpha>
+      do i = 1, 3    ! loop over all 3 combinations of 2 particles with spin ma
+        putj = p(i, ma)
+        if(banned(putj,puti,bant)) cycle
+        i1 = turn3(1,i)
+        i2 = turn3(2,i)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        hji = mo_bi_ortho_tc_two_e(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e( p2, p1, h1, h2)
+        if (hji == 0.d0) cycle
+        hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+
+        if(ma == 1) then ! if particle spins are (alpha,alpha,alpha,beta), then puti is beta and putj is alpha
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji
+          enddo
+        else            ! if particle spins are (beta,beta,beta,alpha), then puti is alpha and putj is beta
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji
+          enddo
+        end if
+      end do
+    else ! if 2 alpha and 2 beta particles
+      h1 = h(1,1)
+      h2 = h(1,2)
+      !! <alpha|H|psi>
+      do j = 1,2 ! loop over all 4 combinations of one alpha and one beta particle
+        putj = p(j, 2)
+        if(bannedOrb(putj, 2)) cycle
+        p2 = p(turn2(j), 2)
+        do i = 1,2
+          puti = p(i, 1)
+          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
+          p1 = p(turn2(i), 1)
+    ! hij = <psi_{selectors,i}|H|alpha> 
+!          hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2)
+!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! 
+        ! take the transpose of what's written above because later use the complex conjugate 
+          hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2 )
+          if (hij /= 0.d0) then
+            ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!            hij = dconjg(hij) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            hij = hij * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij
+            enddo
+          endif
+        end do
+      end do
+      !! <phi|H|alpha>
+      do j = 1,2 ! loop over all 4 combinations of one alpha and one beta particle
+        putj = p(j, 2)
+        if(bannedOrb(putj, 2)) cycle
+        p2 = p(turn2(j), 2)
+        do i = 1,2
+          puti = p(i, 1)
+          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
+          p1 = p(turn2(i), 1)
+          hji = mo_bi_ortho_tc_two_e( p1, p2, h1, h2)
+          if (hji /= 0.d0) then
+            hji = hji * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+            !DIR$ LOOP COUNT AVG(4)
+            do k=1,N_states
+              mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji
+            enddo
+          endif
+        end do
+      end do
+    end if
+
+  else ! if holes are (a,a) or (b,b)
+    if(tip == 0) then ! if particles are (a,a,a,a) or (b,b,b,b)
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+      !! <alpha|H|psi>
+      do i=1,3
+        puti = p(i, ma)
+        if(bannedOrb(puti,ma)) cycle
+        do j=i+1,4
+          putj = p(j, ma)
+          if(bannedOrb(putj,ma)) cycle
+          if(banned(puti,putj,1)) cycle
+
+          i1 = turn2d(1, i, j)
+          i2 = turn2d(2, i, j)
+          p1 = p(i1, ma)
+          p2 = p(i2, ma)
+!          hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2)
+!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! 
+        ! take the transpose of what's written above because later use the complex conjugate 
+          hij = mo_bi_ortho_tc_two_e(h1, h2, p1, p2) - mo_bi_ortho_tc_two_e(h1, h2, p2,p1 )
+          if (hij == 0.d0) cycle
+
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!          hij = dconjg(hij) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          hij = hij * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) +coefs(k,1) * hij
+          enddo
+        end do
+      end do
+      !! <phi|H|alpha>
+      do i=1,3
+        puti = p(i, ma)
+        if(bannedOrb(puti,ma)) cycle
+        do j=i+1,4
+          putj = p(j, ma)
+          if(bannedOrb(putj,ma)) cycle
+          if(banned(puti,putj,1)) cycle
+          i1 = turn2d(1, i, j)
+          i2 = turn2d(2, i, j)
+          p1 = p(i1, ma)
+          p2 = p(i2, ma)
+          hji = mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1,h1, h2 )
+          if (hji == 0.d0) cycle
+          hji = hji * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) +coefs(k,2) * hji
+          enddo
+        end do
+      end do
+    else if(tip == 3) then ! if particles are (a,a,a,b) (ma=1,mi=2) or (a,b,b,b) (ma=2,mi=1)
+      h1 = h(1, mi)
+      h2 = h(1, ma)
+      p1 = p(1, mi)
+      !! <alpha|H|psi>
+      do i=1,3
+        puti = p(turn3(1,i), ma)
+        if(bannedOrb(puti,ma)) cycle
+        putj = p(turn3(2,i), ma)
+        if(bannedOrb(putj,ma)) cycle
+        if(banned(puti,putj,1)) cycle
+        p2 = p(i, ma)
+
+!        hij = mo_bi_ortho_tc_two_e(p1, p2, h1, h2)
+!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! 
+        ! take the transpose of what's written above because later use the complex conjugate 
+        hij = mo_bi_ortho_tc_two_e(h1, h2,p1, p2 )
+        if (hij == 0.d0) cycle
+
+        ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!        hij = dconjg(hij) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        hij = hij * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        if (puti < putj) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij
+          enddo
+        else
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, putj, puti) = mat_r(k, putj, puti) + coefs(k,1) * hij
+          enddo
+        endif
+      end do
+      !! <phi|H|alpha>
+      do i=1,3
+        puti = p(turn3(1,i), ma)
+        if(bannedOrb(puti,ma)) cycle
+        putj = p(turn3(2,i), ma)
+        if(bannedOrb(putj,ma)) cycle
+        if(banned(puti,putj,1)) cycle
+        p2 = p(i, ma)
+        hji = mo_bi_ortho_tc_two_e(p1, p2,h1, h2)
+        if (hji == 0.d0) cycle
+        hji = hji * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        if (puti < putj) then
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji
+          enddo
+        else
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, putj, puti) = mat_l(k, putj, puti) + coefs(k,2) * hji
+          enddo
+        endif
+      end do
+    else ! tip == 4  (a,a,b,b)
+      puti = p(1, sp)
+      putj = p(2, sp)
+      if(.not. banned(puti,putj,1)) then
+        p1 = p(1, mi)
+        p2 = p(2, mi)
+        h1 = h(1, mi)
+        h2 = h(2, mi)
+      !! <alpha|H|psi>
+!        hij = (mo_bi_ortho_tc_two_e(p1, p2, h1, h2) - mo_bi_ortho_tc_two_e(p2,p1, h1, h2))
+!!!!!!!!!!!!! WARNING !!!!!!!!!!!!!!!! 
+        ! take the transpose of what's written above because later use the complex conjugate 
+        hij = (mo_bi_ortho_tc_two_e(h1, h2,p1, p2) - mo_bi_ortho_tc_two_e(h1, h2, p2,p1))
+        if (hij /= 0.d0) then
+          ! take conjugate to get contribution to <alpha|H|psi> instead of <psi|H|alpha>
+!          hij = dconjg(hij) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          hij = hij * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_r(k, puti, putj) = mat_r(k, puti, putj) + coefs(k,1) * hij
+          enddo
+        end if
+      !! <phi|H|alpha>
+        hji = (mo_bi_ortho_tc_two_e(p1, p2,h1, h2) - mo_bi_ortho_tc_two_e( p2,p1, h1, h2))
+        if (hji /= 0.d0) then
+          hji = hji * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+          !DIR$ LOOP COUNT AVG(4)
+          do k=1,N_states
+            mat_l(k, puti, putj) = mat_l(k, puti, putj) + coefs(k,2) * hji
+          enddo
+        end if
+      end if
+    end if
+  end if
+end
--- a/src/cipsi_tc_bi_ortho/lock_2rdm.irp.f
+++ b/src/cipsi_tc_bi_ortho/lock_2rdm.irp.f
--- a/src/cipsi_tc_bi_ortho/pouet
+++ b/src/cipsi_tc_bi_ortho/pouet
@ -0,0 +1,33 @@
+
+          if(dabs(psi_h_alpha*alpha_h_psi - psi_h_alpha_tmp*alpha_h_psi_tmp).gt.1.d-10)then
+!!!          print*,'---'
+!!!          print*,psi_h_alpha    *alpha_h_psi,    psi_h_alpha,    alpha_h_psi  
+!!!          print*,psi_h_alpha_tmp*alpha_h_psi_tmp,psi_h_alpha_tmp,alpha_h_psi_tmp  
+           call debug_det(det,N_int)
+            print*,dabs(psi_h_alpha*alpha_h_psi - psi_h_alpha_tmp*alpha_h_psi_tmp),psi_h_alpha    *alpha_h_psi,psi_h_alpha_tmp*alpha_h_psi_tmp
+            print*,'-- Good '
+            print*,   psi_h_alpha,    alpha_h_psi  
+            print*,'-- bad '
+            print*,psi_h_alpha_tmp,alpha_h_psi_tmp  
+            print*,'-- details good'
+          double precision :: accu_1, accu_2
+          accu_1 = 0.d0
+          accu_2 = 0.d0
+          do iii = 1, N_det
+            call get_excitation_degree( psi_det(1,1,iii), det, degree, N_int)
+            call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,iii), det, N_int, i_h_alpha)
+            call htilde_mu_mat_bi_ortho_tot(det, psi_det(1,1,iii), N_int, alpha_h_i)
+            print*,iii,degree,i_h_alpha,alpha_h_i
+            accu_1 += i_h_alpha
+            accu_2 += alpha_h_i
+            print*,accu_1,accu_2
+            
+          enddo
+!!!          if(dabs(psi_h_alpha*alpha_h_psi).gt.1.d-10)then
+!!!          print*,p1,p2
+!!!          print*,det(1,1), det(1,2)
+!!!          call debug_det(det,N_int)
+!!!          print*,psi_h_alpha    *alpha_h_psi,    psi_h_alpha,    alpha_h_psi  
+!!!          print*,psi_h_alpha_tmp*alpha_h_psi_tmp,psi_h_alpha_tmp,alpha_h_psi_tmp  
+!!!          print*, dabs(psi_h_alpha*alpha_h_psi - psi_h_alpha_tmp*alpha_h_psi_tmp),& 
+!!!                   psi_h_alpha    *alpha_h_psi,psi_h_alpha_tmp*alpha_h_psi_tmp
--- a/src/cipsi_tc_bi_ortho/pt2.irp.f
+++ b/src/cipsi_tc_bi_ortho/pt2.irp.f
@ -0,0 +1,89 @@
+subroutine pt2_tc_bi_ortho
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Selected Full Configuration Interaction with Stochastic selection and PT2.
+  END_DOC
+  integer                        :: i,j,k,ndet
+  double precision, allocatable  :: zeros(:)
+  integer                        :: to_select
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  logical, external              :: qp_stop
+  logical                        :: print_pt2
+
+  double precision :: rss
+  double precision, external :: memory_of_double
+  double precision :: correlation_energy_ratio,E_denom,E_tc,norm
+  double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:)
+  PROVIDE H_apply_buffer_allocated distributed_davidson mo_two_e_integrals_in_map
+
+  print*,'Diagonal elements of the Fock matrix '
+  do i = 1, mo_num
+   write(*,*)i,Fock_matrix_tc_mo_tot(i,i)
+  enddo
+  N_iter = 1
+  threshold_generators = 1.d0
+  SOFT_TOUCH threshold_generators
+
+  rss = memory_of_double(N_states)*4.d0
+  call check_mem(rss,irp_here)
+
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+
+  double precision               :: hf_energy_ref
+  logical                        :: has
+  double precision               :: relative_error
+
+  relative_error=PT2_relative_error
+
+  zeros = 0.d0
+  pt2_data % pt2   = -huge(1.e0)
+  pt2_data % rpt2  = -huge(1.e0)
+  pt2_data % overlap= 0.d0
+  pt2_data % variance = huge(1.e0)
+
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  print_pt2 = .False.
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  call routine_save_right
+
+  if (N_det > N_det_max) then
+    psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det)
+    psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states)
+    N_det = N_det_max
+    soft_touch N_det psi_det psi_coef
+    if (s2_eig) then
+      call make_s2_eigenfunction
+    endif
+    print_pt2 = .False.
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+  endif
+
+  allocate(ept2(1000),pt1(1000),extrap_energy(100))
+
+  correlation_energy_ratio = 0.d0
+
+! thresh_it_dav  = 5.d-5
+! soft_touch thresh_it_dav
+
+  print_pt2 = .True.
+  to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
+  to_select = max(N_states_diag, to_select)
+
+  E_denom = E_tc ! TC Energy of the current wave function 
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+  call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
+
+  N_iter += 1
+
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+
+end
+
--- a/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f
+++ b/src/cipsi_tc_bi_ortho/pt2_stoch_routines.irp.f
@ -0,0 +1,869 @@
+BEGIN_PROVIDER [ integer, pt2_stoch_istate ]
+ implicit none
+ BEGIN_DOC
+ ! State for stochatsic PT2
+ END_DOC
+ pt2_stoch_istate = 1
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ]
+&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ]
+  implicit none
+  logical, external :: testTeethBuilding
+  integer :: i,j
+  pt2_n_tasks_max = elec_alpha_num*elec_alpha_num + elec_alpha_num*elec_beta_num  - n_core_orb*2
+  pt2_n_tasks_max = min(pt2_n_tasks_max,1+N_det_generators/10000)
+  call write_int(6,pt2_n_tasks_max,'pt2_n_tasks_max')
+
+  pt2_F(:) = max(int(sqrt(float(pt2_n_tasks_max))),1)
+  do i=1,pt2_n_0(1+pt2_N_teeth/4)
+    pt2_F(i) = pt2_n_tasks_max*pt2_min_parallel_tasks
+  enddo
+  do i=1+pt2_n_0(pt2_N_teeth-pt2_N_teeth/4), pt2_n_0(pt2_N_teeth-pt2_N_teeth/10)
+    pt2_F(i) = pt2_min_parallel_tasks
+  enddo
+  do i=1+pt2_n_0(pt2_N_teeth-pt2_N_teeth/10), N_det_generators
+    pt2_F(i) = 1
+  enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, pt2_N_teeth ]
+&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ]
+  implicit none
+  logical, external :: testTeethBuilding
+
+  if(N_det_generators < 500) then
+    pt2_minDetInFirstTeeth = 1
+    pt2_N_teeth = 1
+  else
+    pt2_minDetInFirstTeeth = min(5, N_det_generators)
+    do pt2_N_teeth=100,2,-1
+      if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit
+    end do
+  end if
+  call write_int(6,pt2_N_teeth,'Number of comb teeth')
+END_PROVIDER
+
+
+logical function testTeethBuilding(minF, N)
+  implicit none
+  integer, intent(in) :: minF, N
+  integer :: n0, i
+  double precision :: u0, Wt, r
+
+  double precision, allocatable :: tilde_w(:), tilde_cW(:)
+  integer, external :: dress_find_sample
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+
+  rss = memory_of_double(2*N_det_generators+1)
+  call check_mem(rss,irp_here)
+
+  allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
+
+  double precision :: norm2
+  norm2 = 0.d0
+  do i=N_det_generators,1,-1
+    tilde_w(i)  = psi_coef_sorted_tc_gen(i,pt2_stoch_istate) * &
+                  psi_coef_sorted_tc_gen(i,pt2_stoch_istate)
+    norm2 = norm2 + tilde_w(i)
+  enddo
+
+  f = 1.d0/norm2
+  tilde_w(:) = tilde_w(:) * f
+
+  tilde_cW(0) = -1.d0
+  do i=1,N_det_generators
+    tilde_cW(i) = tilde_cW(i-1) + tilde_w(i)
+  enddo
+  tilde_cW(:) = tilde_cW(:) + 1.d0
+  deallocate(tilde_w)
+
+  n0 = 0
+  testTeethBuilding = .false.
+  double precision :: f
+  integer :: minFN
+  minFN = N_det_generators - minF * N
+  f = 1.d0/dble(N)
+  do
+    u0 = tilde_cW(n0)
+    r = tilde_cW(n0 + minF)
+    Wt = (1d0 - u0) * f
+    if (dabs(Wt) <= 1.d-3) then
+      exit
+    endif
+    if(Wt >= r - u0) then
+       testTeethBuilding = .true.
+       exit
+    end if
+    n0 += 1
+    if(n0 > minFN) then
+      exit
+    end if
+  end do
+  deallocate(tilde_cW)
+
+end function
+
+
+
+subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
+  use f77_zmq
+  use selection_types
+
+  implicit none
+
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket, zmq_socket_pull
+  integer, intent(in)            :: N_in
+!  integer, intent(inout)         :: N_in
+  double precision, intent(in)   :: relative_error, E(N_states)
+  type(pt2_type), intent(inout)  :: pt2_data, pt2_data_err
+!
+  integer                        :: i, N
+
+  double precision               :: state_average_weight_save(N_states), w(N_states,4)
+  integer(ZMQ_PTR), external     :: new_zmq_to_qp_run_socket
+  type(selection_buffer)         :: b
+
+  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp_tc psi_det_sorted_tc
+  PROVIDE psi_det_hii selection_weight pseudo_sym
+  PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
+  PROVIDE excitation_beta_max  excitation_alpha_max excitation_max
+  PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp
+
+  if (h0_type == 'CFG') then
+    PROVIDE psi_configuration_hii det_to_configuration
+  endif
+
+  if (N_det <= max(4,N_states) .or. pt2_N_teeth < 2) then
+    print*,'ZMQ_selection'
+    call ZMQ_selection(N_in, pt2_data)
+  else
+    print*,'else ZMQ_selection'
+
+    N = max(N_in,1) * N_states
+    state_average_weight_save(:) = state_average_weight(:)
+    if (int(N,8)*2_8 > huge(1)) then
+      print *,  irp_here, ': integer too large'
+      stop -1
+    endif
+    call create_selection_buffer(N, N*2, b)
+    ASSERT (associated(b%det))
+    ASSERT (associated(b%val))
+
+    do pt2_stoch_istate=1,N_states
+      state_average_weight(:) = 0.d0
+      state_average_weight(pt2_stoch_istate) = 1.d0
+      TOUCH state_average_weight pt2_stoch_istate selection_weight
+
+      PROVIDE nproc pt2_F mo_two_e_integrals_in_map mo_one_e_integrals pt2_w
+      PROVIDE pt2_u pt2_J pt2_R
+      call new_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
+
+      integer, external              :: zmq_put_psi
+      integer, external              :: zmq_put_N_det_generators
+      integer, external              :: zmq_put_N_det_selectors
+      integer, external              :: zmq_put_dvector
+      integer, external              :: zmq_put_ivector
+      if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
+        stop 'Unable to put psi on ZMQ server'
+      endif
+      if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then
+        stop 'Unable to put N_det_generators on ZMQ server'
+      endif
+      if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then
+        stop 'Unable to put N_det_selectors on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then
+        stop 'Unable to put energy on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then
+        stop 'Unable to put state_average_weight on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) then
+        stop 'Unable to put selection_weight on ZMQ server'
+      endif
+      if (zmq_put_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) then
+        stop 'Unable to put pt2_stoch_istate on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) then
+        stop 'Unable to put threshold_generators on ZMQ server'
+      endif
+
+
+      integer, external :: add_task_to_taskserver
+      character(300000) :: task
+
+      integer :: j,k,ipos,ifirst
+      ifirst=0
+
+      ipos=0
+      do i=1,N_det_generators
+        if (pt2_F(i) > 1) then
+          ipos += 1
+        endif
+      enddo
+      call write_int(6,sum(pt2_F),'Number of tasks')
+      call write_int(6,ipos,'Number of fragmented tasks')
+
+      ipos=1
+      do i= 1, N_det_generators
+        do j=1,pt2_F(pt2_J(i))
+          write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, pt2_J(i), N_in
+          ipos += 30
+          if (ipos > 300000-30) then
+            if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+              stop 'Unable to add task to task server'
+            endif
+            ipos=1
+            if (ifirst == 0) then
+              ifirst=1
+              if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+                print *,  irp_here, ': Failed in zmq_set_running'
+              endif
+            endif
+          endif
+        end do
+      enddo
+      if (ipos > 1) then
+        if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+          stop 'Unable to add task to task server'
+        endif
+      endif
+
+      integer, external :: zmq_set_running
+      if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+        print *,  irp_here, ': Failed in zmq_set_running'
+      endif
+
+
+      double precision :: mem_collector, mem, rss
+
+      call resident_memory(rss)
+
+      mem_collector = 8.d0 *                  & ! bytes
+            ( 1.d0*pt2_n_tasks_max            & ! task_id, index
+            + 0.635d0*N_det_generators        & ! f,d
+            + pt2_n_tasks_max*pt2_type_size(N_states) & ! pt2_data_task
+            + N_det_generators*pt2_type_size(N_states)  & ! pt2_data_I
+            + 4.d0*(pt2_N_teeth+1)            & ! S, S2, T2, T3
+            + 1.d0*(N_int*2.d0*N + N)         & ! selection buffer
+            + 1.d0*(N_int*2.d0*N + N)         & ! sort selection buffer
+            ) / 1024.d0**3
+
+      integer :: nproc_target, ii
+      nproc_target = nthreads_pt2
+      ii = min(N_det, (elec_alpha_num*(mo_num-elec_alpha_num))**2)
+
+      do
+        mem = mem_collector +                   & !
+              nproc_target * 8.d0 *             & ! bytes
+              ( 0.5d0*pt2_n_tasks_max           & ! task_id
+              + 64.d0*pt2_n_tasks_max           & ! task
+              + pt2_type_size(N_states)*pt2_n_tasks_max*N_states   & ! pt2, variance, overlap
+              + 1.d0*pt2_n_tasks_max            & ! i_generator, subset
+              + 1.d0*(N_int*2.d0*ii+ ii)        & ! selection buffer
+              + 1.d0*(N_int*2.d0*ii+ ii)        & ! sort selection buffer
+              + 2.0d0*(ii)                      & ! preinteresting, interesting,
+                                                  ! prefullinteresting, fullinteresting
+              + 2.0d0*(N_int*2*ii)              & ! minilist, fullminilist
+              + 1.0d0*(N_states*mo_num*mo_num)  & ! mat
+              ) / 1024.d0**3
+
+        if (nproc_target == 0) then
+          call check_mem(mem,irp_here)
+          nproc_target = 1
+          exit
+        endif
+
+        if (mem+rss < qp_max_mem) then
+          exit
+        endif
+
+        nproc_target = nproc_target - 1
+
+      enddo
+      call write_int(6,nproc_target,'Number of threads for PT2')
+      call write_double(6,mem,'Memory (Gb)')
+
+      call omp_set_max_active_levels(1)
+
+
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+      print '(A)', ' Samples          Energy                Variance               Norm^2          Seconds'
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+
+      PROVIDE global_selection_buffer
+
+      !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1)            &
+          !$OMP  PRIVATE(i)
+      i = omp_get_thread_num()
+      if (i==0) then
+
+        call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, pt2_data, pt2_data_err, b, N)
+        pt2_data % rpt2(pt2_stoch_istate) =  &
+          pt2_data % pt2(pt2_stoch_istate)/(1.d0+pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate))
+
+        !TODO : We should use here the correct formula for the error of X/Y
+        pt2_data_err % rpt2(pt2_stoch_istate) =  &
+          pt2_data_err % pt2(pt2_stoch_istate)/(1.d0 + pt2_data % overlap(pt2_stoch_istate,pt2_stoch_istate))
+
+      else
+        call pt2_slave_inproc(i)
+      endif
+      !$OMP END PARALLEL
+      call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
+      call omp_set_max_active_levels(8)
+
+      print '(A)', '========== ======================= ===================== ===================== ==========='
+
+    do k=1,N_states
+      pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate)
+    enddo
+    SOFT_TOUCH pt2_overlap
+
+    enddo
+    FREE pt2_stoch_istate
+
+    ! Symmetrize overlap
+    do j=2,N_states
+     do i=1,j-1
+       pt2_overlap(i,j) = 0.5d0 * (pt2_overlap(i,j) + pt2_overlap(j,i))
+       pt2_overlap(j,i) = pt2_overlap(i,j)
+     enddo
+    enddo
+
+    print *, 'Overlap of perturbed states:'
+    do k=1,N_states
+      print *, pt2_overlap(k,:)
+    enddo
+    print *, '-------'
+
+    if (N_in > 0) then
+      b%cur = min(N_in,b%cur)
+      if (s2_eig) then
+        call make_selection_buffer_s2(b)
+      else
+        call remove_duplicates_in_selection_buffer(b)
+      endif
+      call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0)
+    endif
+    call delete_selection_buffer(b)
+
+    state_average_weight(:) = state_average_weight_save(:)
+    TOUCH state_average_weight
+    call update_pt2_and_variance_weights(pt2_data, N_states)
+  endif
+
+
+end subroutine
+
+
+subroutine pt2_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+
+  PROVIDE global_selection_buffer
+  call run_pt2_slave(1,i,pt2_e0_denominator)
+end
+
+
+subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_err, b, N_)
+  use f77_zmq
+  use selection_types
+  use bitmasks
+  implicit none
+
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  double precision, intent(in)   :: relative_error, E
+  type(pt2_type), intent(inout)  :: pt2_data, pt2_data_err
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in)            :: N_
+
+  type(pt2_type), allocatable    :: pt2_data_task(:)
+  type(pt2_type), allocatable    :: pt2_data_I(:)
+  type(pt2_type), allocatable    :: pt2_data_S(:)
+  type(pt2_type), allocatable    :: pt2_data_S2(:)
+  type(pt2_type)                 :: pt2_data_teeth
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+  integer, external :: zmq_delete_tasks_async_send
+  integer, external :: zmq_delete_tasks_async_recv
+  integer, external :: zmq_abort
+  integer, external :: pt2_find_sample_lr
+
+  PROVIDE pt2_stoch_istate
+
+  integer :: more, n, i, p, c, t, n_tasks, U
+  integer, allocatable :: task_id(:)
+  integer, allocatable :: index(:)
+
+  double precision :: v, x, x2, x3, avg, avg2, avg3(N_states), eqt, E0, v0, n0(N_states)
+  double precision :: eqta(N_states)
+  double precision :: time, time1, time0
+
+  integer, allocatable :: f(:)
+  logical, allocatable :: d(:)
+  logical :: do_exit, stop_now, sending
+  logical, external :: qp_stop
+  type(selection_buffer) :: b2
+
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+
+  sending =.False.
+
+  rss  = memory_of_int(pt2_n_tasks_max*2+N_det_generators*2)
+  rss += memory_of_double(N_states*N_det_generators)*3.d0
+  rss += memory_of_double(N_states*pt2_n_tasks_max)*3.d0
+  rss += memory_of_double(pt2_N_teeth+1)*4.d0
+  call check_mem(rss,irp_here)
+
+  ! If an allocation is added here, the estimate of the memory should also be
+  ! updated in ZMQ_pt2
+  allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators))
+  allocate(d(N_det_generators+1))
+  allocate(pt2_data_task(pt2_n_tasks_max))
+  allocate(pt2_data_I(N_det_generators))
+  allocate(pt2_data_S(pt2_N_teeth+1))
+  allocate(pt2_data_S2(pt2_N_teeth+1))
+
+
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+  call create_selection_buffer(N_, N_*2, b2)
+
+
+  pt2_data % pt2(pt2_stoch_istate) = -huge(1.)
+  pt2_data_err % pt2(pt2_stoch_istate) = huge(1.)
+  pt2_data % variance(pt2_stoch_istate) = huge(1.)
+  pt2_data_err % variance(pt2_stoch_istate) = huge(1.)
+  pt2_data % overlap(:,pt2_stoch_istate) = 0.d0
+  pt2_data_err % overlap(:,pt2_stoch_istate) = huge(1.)
+  n = 1
+  t = 0
+  U = 0
+  do i=1,pt2_n_tasks_max
+    call pt2_alloc(pt2_data_task(i),N_states)
+  enddo
+  do i=1,pt2_N_teeth+1
+    call pt2_alloc(pt2_data_S(i),N_states)
+    call pt2_alloc(pt2_data_S2(i),N_states)
+  enddo
+  do i=1,N_det_generators
+    call pt2_alloc(pt2_data_I(i),N_states)
+  enddo
+  f(:) = pt2_F(:)
+  d(:) = .false.
+  n_tasks = 0
+  E0 = E
+  v0 = 0.d0
+  n0(:) = 0.d0
+  more = 1
+  call wall_time(time0)
+  time1 = time0
+
+  do_exit = .false.
+  stop_now = .false.
+  do while (n <= N_det_generators)
+    if(f(pt2_J(n)) == 0) then
+      d(pt2_J(n)) = .true.
+      do while(d(U+1))
+        U += 1
+      end do
+
+      ! Deterministic part
+      do while(t <= pt2_N_teeth)
+        if(U >= pt2_n_0(t+1)) then
+          t=t+1
+          E0 = 0.d0
+          v0 = 0.d0
+          n0(:) = 0.d0
+          do i=pt2_n_0(t),1,-1
+            E0 += pt2_data_I(i) % pt2(pt2_stoch_istate)
+            v0 += pt2_data_I(i) % variance(pt2_stoch_istate)
+            n0(:) += pt2_data_I(i) % overlap(:,pt2_stoch_istate)
+          end do
+        else
+          exit
+        end if
+      end do
+
+      ! Add Stochastic part
+      c = pt2_R(n)
+      if(c > 0) then
+
+        call pt2_alloc(pt2_data_teeth,N_states)
+        do p=pt2_N_teeth, 1, -1
+          v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1))
+          i = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(p),pt2_n_0(p+1))
+          v = pt2_W_T / pt2_w(i)
+          call pt2_add ( pt2_data_teeth,  v,  pt2_data_I(i) )
+          call pt2_add ( pt2_data_S(p),  1.d0,  pt2_data_teeth )
+          call pt2_add2( pt2_data_S2(p), 1.d0,  pt2_data_teeth )
+        enddo
+        call pt2_dealloc(pt2_data_teeth)
+
+        avg  = E0 + pt2_data_S(t) % pt2(pt2_stoch_istate) / dble(c)
+        avg2 = v0 + pt2_data_S(t) % variance(pt2_stoch_istate) / dble(c)
+        avg3(:) = n0(:) + pt2_data_S(t) % overlap(:,pt2_stoch_istate) / dble(c)
+        if ((avg /= 0.d0) .or. (n == N_det_generators) ) then
+          do_exit = .true.
+        endif
+        if (qp_stop()) then
+          stop_now = .True.
+        endif
+        pt2_data % pt2(pt2_stoch_istate) = avg
+        pt2_data % variance(pt2_stoch_istate) = avg2
+        pt2_data % overlap(:,pt2_stoch_istate) = avg3(:)
+        call wall_time(time)
+        ! 1/(N-1.5) : see  Brugger, The American Statistician (23) 4 p. 32 (1969)
+        if(c > 2) then
+          eqt = dabs((pt2_data_S2(t) % pt2(pt2_stoch_istate) / c) - (pt2_data_S(t) % pt2(pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          eqt = sqrt(eqt / (dble(c) - 1.5d0))
+          pt2_data_err % pt2(pt2_stoch_istate) = eqt
+
+          eqt = dabs((pt2_data_S2(t) % variance(pt2_stoch_istate) / c) - (pt2_data_S(t) % variance(pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          eqt = sqrt(eqt / (dble(c) - 1.5d0))
+          pt2_data_err % variance(pt2_stoch_istate) = eqt
+
+          eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2) ! dabs for numerical stability
+          eqta(:) = sqrt(eqta(:) / (dble(c) - 1.5d0))
+          pt2_data_err % overlap(:,pt2_stoch_istate) = eqta(:)
+
+
+          if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then
+            time1 = time
+            print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.4)', c, &
+              pt2_data     % pt2(pt2_stoch_istate) +E, &
+              pt2_data_err % pt2(pt2_stoch_istate), &
+              pt2_data     % variance(pt2_stoch_istate), &
+              pt2_data_err % variance(pt2_stoch_istate), &
+              pt2_data     % overlap(pt2_stoch_istate,pt2_stoch_istate), &
+              pt2_data_err % overlap(pt2_stoch_istate,pt2_stoch_istate), &
+              time-time0
+            if (stop_now .or. (                                      &
+                  (do_exit .and. (dabs(pt2_data_err % pt2(pt2_stoch_istate)) /    &
+                  (1.d-20 + dabs(pt2_data % pt2(pt2_stoch_istate)) ) <= relative_error))) ) then
+              if (zmq_abort(zmq_to_qp_run_socket) == -1) then
+                call sleep(10)
+                if (zmq_abort(zmq_to_qp_run_socket) == -1) then
+                  print *, irp_here, ': Error in sending abort signal (2)'
+                endif
+              endif
+            endif
+          endif
+        endif
+      end if
+      n += 1
+    else if(more == 0) then
+      exit
+    else
+      call pull_pt2_results(zmq_socket_pull, index, pt2_data_task, task_id, n_tasks, b2)
+      if(n_tasks > pt2_n_tasks_max)then
+       print*,'PB !!!'
+       print*,'If you see this, send a bug report with the following content'
+       print*,irp_here
+       print*,'n_tasks,pt2_n_tasks_max = ',n_tasks,pt2_n_tasks_max
+       stop -1
+      endif
+      if (zmq_delete_tasks_async_send(zmq_to_qp_run_socket,task_id,n_tasks,sending) == -1) then
+          stop 'PT2: Unable to delete tasks (send)'
+      endif
+      do i=1,n_tasks
+        if(index(i).gt.size(pt2_data_I,1).or.index(i).lt.1)then
+         print*,'PB !!!'
+         print*,'If you see this, send a bug report with the following content'
+         print*,irp_here
+         print*,'i,index(i),size(pt2_data_I,1) = ',i,index(i),size(pt2_data_I,1)
+         stop -1
+        endif
+        call pt2_add(pt2_data_I(index(i)),1.d0,pt2_data_task(i))
+        f(index(i)) -= 1
+      end do
+      do i=1, b2%cur
+        ! We assume the pulled buffer is sorted
+        if (b2%val(i) > b%mini) exit
+        call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i))
+      end do
+      if (zmq_delete_tasks_async_recv(zmq_to_qp_run_socket,more,sending) == -1) then
+          stop 'PT2: Unable to delete tasks (recv)'
+      endif
+    end if
+  end do
+  do i=1,N_det_generators
+    call pt2_dealloc(pt2_data_I(i))
+  enddo
+  do i=1,pt2_N_teeth+1
+    call pt2_dealloc(pt2_data_S(i))
+    call pt2_dealloc(pt2_data_S2(i))
+  enddo
+  do i=1,pt2_n_tasks_max
+    call pt2_dealloc(pt2_data_task(i))
+  enddo
+!print *,  'deleting b2'
+  call delete_selection_buffer(b2)
+!print *,  'sorting b'
+  call sort_selection_buffer(b)
+!print *,  'done'
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+
+end subroutine
+
+
+integer function pt2_find_sample(v, w)
+  implicit none
+  double precision, intent(in) :: v, w(0:N_det_generators)
+  integer, external :: pt2_find_sample_lr
+
+  pt2_find_sample = pt2_find_sample_lr(v, w, 0, N_det_generators)
+end function
+
+
+integer function pt2_find_sample_lr(v, w, l_in, r_in)
+  implicit none
+  double precision, intent(in) :: v, w(0:N_det_generators)
+  integer, intent(in) :: l_in,r_in
+  integer :: i,l,r
+
+  l=l_in
+  r=r_in
+
+  do while(r-l > 1)
+    i = shiftr(r+l,1)
+    if(w(i) < v) then
+      l = i
+    else
+      r = i
+    end if
+  end do
+  i = r
+  do r=i+1,N_det_generators
+    if (w(r) /= w(i)) then
+      exit
+    endif
+  enddo
+  pt2_find_sample_lr = r-1
+end function
+
+
+BEGIN_PROVIDER [ integer, pt2_n_tasks ]
+ implicit none
+ BEGIN_DOC
+ ! Number of parallel tasks for the Monte Carlo
+ END_DOC
+ pt2_n_tasks = N_det_generators
+END_PROVIDER
+
+BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)]
+  implicit none
+  integer, allocatable :: seed(:)
+  integer :: m,i
+  call random_seed(size=m)
+  allocate(seed(m))
+  do i=1,m
+    seed(i) = i
+  enddo
+  call random_seed(put=seed)
+  deallocate(seed)
+
+  call RANDOM_NUMBER(pt2_u)
+ END_PROVIDER
+
+ BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)]
+&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)]
+  implicit none
+  BEGIN_DOC
+! pt2_J contains the list of generators after ordering them according to the
+! Monte Carlo sampling.
+!
+! pt2_R(i) is the number of combs drawn when determinant i is computed.
+  END_DOC
+  integer                :: N_c, N_j
+  integer                :: U, t, i
+  double precision       :: v
+  integer, external      :: pt2_find_sample_lr
+
+  logical, allocatable :: pt2_d(:)
+  integer :: m,l,r,k
+  integer :: ncache
+  integer, allocatable :: ii(:,:)
+  double precision :: dt
+
+  ncache = min(N_det_generators,10000)
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_int(ncache)*dble(pt2_N_teeth) + memory_of_int(N_det_generators)
+  call check_mem(rss,irp_here)
+
+  allocate(ii(pt2_N_teeth,ncache),pt2_d(N_det_generators))
+
+  pt2_R(:) = 0
+  pt2_d(:) = .false.
+  N_c = 0
+  N_j = pt2_n_0(1)
+  do i=1,N_j
+      pt2_d(i) = .true.
+      pt2_J(i) = i
+  end do
+
+  U = 0
+  do while(N_j < pt2_n_tasks)
+
+    if (N_c+ncache > N_det_generators) then
+      ncache = N_det_generators - N_c
+    endif
+
+    !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(dt,v,t,k)
+    do k=1, ncache
+      dt = pt2_u_0
+      do t=1, pt2_N_teeth
+        v = dt + pt2_W_T *pt2_u(N_c+k)
+        dt = dt + pt2_W_T
+        ii(t,k) = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(t),pt2_n_0(t+1))
+      end do
+    enddo
+    !$OMP END PARALLEL DO
+
+    do k=1,ncache
+      !ADD_COMB
+      N_c = N_c+1
+      do t=1, pt2_N_teeth
+        i = ii(t,k)
+        if(.not. pt2_d(i)) then
+          N_j += 1
+          pt2_J(N_j) = i
+          pt2_d(i) = .true.
+        end if
+      end do
+
+      pt2_R(N_j) = N_c
+
+      !FILL_TOOTH
+      do while(U < N_det_generators)
+        U += 1
+        if(.not. pt2_d(U)) then
+          N_j += 1
+          pt2_J(N_j) = U
+          pt2_d(U) = .true.
+          exit
+        end if
+      end do
+      if (N_j >= pt2_n_tasks) exit
+    end do
+  enddo
+
+  if(N_det_generators > 1) then
+    pt2_R(N_det_generators-1) = 0
+    pt2_R(N_det_generators) = N_c
+  end if
+
+  deallocate(ii,pt2_d)
+
+END_PROVIDER
+
+
+
+ BEGIN_PROVIDER [ double precision, pt2_w, (N_det_generators) ]
+&BEGIN_PROVIDER [ double precision, pt2_cW, (0:N_det_generators) ]
+&BEGIN_PROVIDER [ double precision, pt2_W_T ]
+&BEGIN_PROVIDER [ double precision, pt2_u_0 ]
+&BEGIN_PROVIDER [ integer,          pt2_n_0, (pt2_N_teeth+1) ]
+   implicit none
+   integer                        :: i, t
+   double precision, allocatable  :: tilde_w(:), tilde_cW(:)
+   double precision               :: r, tooth_width
+   integer, external              :: pt2_find_sample
+
+   double precision               :: rss
+   double precision, external     :: memory_of_double, memory_of_int
+   rss = memory_of_double(2*N_det_generators+1)
+   call check_mem(rss,irp_here)
+
+   if (N_det_generators == 1) then
+
+     pt2_w(1)   = 1.d0
+     pt2_cw(1)  = 1.d0
+     pt2_u_0    = 1.d0
+     pt2_W_T    = 0.d0
+     pt2_n_0(1) = 0
+     pt2_n_0(2) = 1
+
+   else
+
+     allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
+
+     tilde_cW(0) = 0d0
+
+     do i=1,N_det_generators
+       tilde_w(i)  = psi_coef_sorted_tc_gen(i,pt2_stoch_istate)**2 !+ 1.d-20
+     enddo
+
+     double precision               :: norm2
+     norm2 = 0.d0
+     do i=N_det_generators,1,-1
+       norm2 += tilde_w(i)
+     enddo
+
+     tilde_w(:) = tilde_w(:) / norm2
+
+     tilde_cW(0) = -1.d0
+     do i=1,N_det_generators
+       tilde_cW(i) = tilde_cW(i-1) + tilde_w(i)
+     enddo
+     tilde_cW(:) = tilde_cW(:) + 1.d0
+
+     pt2_n_0(1) = 0
+     do
+     pt2_u_0 = tilde_cW(pt2_n_0(1))
+     r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth)
+     pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth)
+     if(pt2_W_T >= r - pt2_u_0) then
+       exit
+     end if
+     pt2_n_0(1) += 1
+     if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then
+       print *, "teeth building failed"
+       stop -1
+     end if
+   end do
+
+   do t=2, pt2_N_teeth
+     r = pt2_u_0 + pt2_W_T * dble(t-1)
+     pt2_n_0(t) = pt2_find_sample(r, tilde_cW)
+   end do
+   pt2_n_0(pt2_N_teeth+1) = N_det_generators
+
+   pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1))
+   do t=1, pt2_N_teeth
+     tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t))
+     if (tooth_width == 0.d0) then
+       tooth_width = sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1)))
+     endif
+     ASSERT(tooth_width > 0.d0)
+     do i=pt2_n_0(t)+1, pt2_n_0(t+1)
+       pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width
+     end do
+   end do
+
+   pt2_cW(0) = 0d0
+   do i=1,N_det_generators
+     pt2_cW(i) = pt2_cW(i-1) + pt2_w(i)
+   end do
+   pt2_n_0(pt2_N_teeth+1) = N_det_generators
+
+ endif
+END_PROVIDER
+
+
+
+
+
--- a/src/cipsi_tc_bi_ortho/pt2_type.irp.f
+++ b/src/cipsi_tc_bi_ortho/pt2_type.irp.f
@ -0,0 +1,128 @@
+subroutine pt2_alloc(pt2_data,N)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  integer, intent(in)           :: N
+  integer :: k
+
+  allocate(pt2_data % pt2(N)           &
+          ,pt2_data % variance(N)      &
+          ,pt2_data % rpt2(N)          &
+          ,pt2_data % overlap(N,N)     &
+          )
+
+  pt2_data % pt2(:)           = 0.d0
+  pt2_data % variance(:)      = 0.d0
+  pt2_data % rpt2(:)          = 0.d0
+  pt2_data % overlap(:,:)     = 0.d0
+
+end subroutine
+
+subroutine pt2_dealloc(pt2_data)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  deallocate(pt2_data % pt2         &
+            ,pt2_data % variance    &
+            ,pt2_data % rpt2        &
+            ,pt2_data % overlap     &
+            )
+end subroutine
+
+subroutine pt2_add(p1, w, p2)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! p1 += w * p2
+  END_DOC
+  type(pt2_type), intent(inout) :: p1
+  double precision, intent(in)  :: w
+  type(pt2_type), intent(in)    :: p2
+
+  if (w == 1.d0) then
+
+    p1 % pt2(:)            = p1 % pt2(:)           + p2 % pt2(:)
+    p1 % rpt2(:)           = p1 % rpt2(:)          + p2 % rpt2(:)
+    p1 % variance(:)       = p1 % variance(:)      + p2 % variance(:)
+    p1 % overlap(:,:)      = p1 % overlap(:,:)     + p2 % overlap(:,:)
+
+  else
+
+    p1 % pt2(:)            = p1 % pt2(:)           + w * p2 % pt2(:)
+    p1 % rpt2(:)           = p1 % rpt2(:)          + w * p2 % rpt2(:)
+    p1 % variance(:)       = p1 % variance(:)      + w * p2 % variance(:)
+    p1 % overlap(:,:)      = p1 % overlap(:,:)     + w * p2 % overlap(:,:)
+
+  endif
+
+end subroutine
+
+
+subroutine pt2_add2(p1, w, p2)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! p1 += w * p2**2
+  END_DOC
+  type(pt2_type), intent(inout) :: p1
+  double precision, intent(in)  :: w
+  type(pt2_type), intent(in)    :: p2
+
+  if (w == 1.d0) then
+
+    p1 % pt2(:)           = p1 % pt2(:)           + p2 % pt2(:)           * p2 % pt2(:)
+    p1 % rpt2(:)          = p1 % rpt2(:)          + p2 % rpt2(:)          * p2 % rpt2(:)
+    p1 % variance(:)      = p1 % variance(:)      + p2 % variance(:)      * p2 % variance(:)
+    p1 % overlap(:,:)     = p1 % overlap(:,:)     + p2 % overlap(:,:)     * p2 % overlap(:,:)
+
+  else
+
+    p1 % pt2(:)           = p1 % pt2(:)           + w * p2 % pt2(:)           * p2 % pt2(:)
+    p1 % rpt2(:)          = p1 % rpt2(:)          + w * p2 % rpt2(:)          * p2 % rpt2(:)
+    p1 % variance(:)      = p1 % variance(:)      + w * p2 % variance(:)      * p2 % variance(:)
+    p1 % overlap(:,:)     = p1 % overlap(:,:)     + w * p2 % overlap(:,:)     * p2 % overlap(:,:)
+
+  endif
+
+end subroutine
+
+
+subroutine pt2_serialize(pt2_data, n, x)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(in)    :: pt2_data
+  integer, intent(in)           :: n
+  double precision, intent(out) :: x(*)
+
+  integer :: i,k,n2
+
+  n2 = n*n
+  x(1:n)           =  pt2_data % pt2(1:n)
+  k=n
+  x(k+1:k+n)     =  pt2_data % rpt2(1:n)
+  k=k+n
+  x(k+1:k+n)     =  pt2_data % variance(1:n)
+  k=k+n
+  x(k+1:k+n2)  =  reshape(pt2_data % overlap(1:n,1:n), (/ n2 /))
+
+end
+
+subroutine pt2_deserialize(pt2_data, n, x)
+  implicit none
+  use selection_types
+  type(pt2_type), intent(inout) :: pt2_data
+  integer, intent(in)           :: n
+  double precision, intent(in)  :: x(*)
+
+  integer :: i,k,n2
+
+  n2 = n*n
+  pt2_data % pt2(1:n)           =   x(1:n)
+  k=n
+  pt2_data % rpt2(1:n)          =   x(k+1:k+n)
+  k=k+n
+  pt2_data % variance(1:n)      =   x(k+1:k+n)
+  k=k+n
+  pt2_data % overlap(1:n,1:n) = reshape(x(k+1:k+n2), (/ n, n /))
+
+end
--- a/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f
+++ b/src/cipsi_tc_bi_ortho/run_pt2_slave.irp.f
@ -0,0 +1,549 @@
+ use omp_lib
+ use selection_types
+ use f77_zmq
+BEGIN_PROVIDER [ integer(omp_lock_kind), global_selection_buffer_lock ]
+ use omp_lib
+ implicit none
+ BEGIN_DOC
+ ! Global buffer for the OpenMP selection
+ END_DOC
+ call omp_init_lock(global_selection_buffer_lock)
+END_PROVIDER
+
+BEGIN_PROVIDER [ type(selection_buffer), global_selection_buffer ]
+ use omp_lib
+ implicit none
+ BEGIN_DOC
+ ! Global buffer for the OpenMP selection
+ END_DOC
+ call omp_set_lock(global_selection_buffer_lock)
+ call delete_selection_buffer(global_selection_buffer)
+ call create_selection_buffer(N_det_generators, 2*N_det_generators, &
+    global_selection_buffer)
+ call omp_unset_lock(global_selection_buffer_lock)
+END_PROVIDER
+
+
+subroutine run_pt2_slave(thread,iproc,energy)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states_diag)
+  integer,  intent(in)            :: thread, iproc
+  if (N_det > 100000 ) then
+    call run_pt2_slave_large(thread,iproc,energy)
+  else
+    call run_pt2_slave_small(thread,iproc,energy)
+  endif
+end
+
+subroutine run_pt2_slave_small(thread,iproc,energy)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states_diag)
+  integer,  intent(in)            :: thread, iproc
+  integer                         :: rc, i
+
+  integer                        :: worker_id, ctask, ltask
+  character*(512), allocatable   :: task(:)
+  integer, allocatable           :: task_id(:)
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket
+  integer(ZMQ_PTR)               :: zmq_socket_push
+
+  type(selection_buffer) :: b
+  logical :: done, buffer_ready
+
+  type(pt2_type), allocatable :: pt2_data(:)
+  integer :: n_tasks, k, N
+  integer, allocatable :: i_generator(:), subset(:)
+
+  double precision, external :: memory_of_double, memory_of_int
+  integer :: bsize ! Size of selection buffers
+
+  allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
+  allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  b%N = 0
+  buffer_ready = .False.
+  n_tasks = 1
+
+  done = .False.
+  do while (.not.done)
+
+    n_tasks = max(1,n_tasks)
+    n_tasks = min(pt2_n_tasks_max,n_tasks)
+
+    integer, external :: get_tasks_from_taskserver
+    if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
+      exit
+    endif
+    done = task_id(n_tasks) == 0
+    if (done) then
+      n_tasks = n_tasks-1
+    endif
+    if (n_tasks == 0) exit
+
+    do k=1,n_tasks
+      call sscanf_ddd(task(k), subset(k), i_generator(k), N)
+    enddo
+    if (b%N == 0) then
+      ! Only first time
+      bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
+      call create_selection_buffer(bsize, bsize*2, b)
+      buffer_ready = .True.
+    else
+      ASSERT (b%N == bsize)
+    endif
+
+    double precision :: time0, time1
+    call wall_time(time0)
+    do k=1,n_tasks
+      call pt2_alloc(pt2_data(k),N_states)
+      b%cur = 0
+      call select_connected(i_generator(k),energy,pt2_data(k),b,subset(k),pt2_F(i_generator(k)))
+    enddo
+    call wall_time(time1)
+
+    integer, external :: tasks_done_to_taskserver
+    if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
+      done = .true.
+    endif
+    call sort_selection_buffer(b)
+    call push_pt2_results(zmq_socket_push, i_generator, pt2_data, b, task_id, n_tasks)
+    do k=1,n_tasks
+      call pt2_dealloc(pt2_data(k))
+    enddo
+    b%cur=0
+
+!    ! Try to adjust n_tasks around nproc/2 seconds per job
+    n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/2) / (time1 - time0 + 1.d0)))
+    n_tasks = min(n_tasks, pt2_n_tasks_max)
+!    n_tasks = 1
+  end do
+
+  integer, external :: disconnect_from_taskserver
+  do i=1,300
+    if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit
+    call usleep(500)
+    print *,  'Retry disconnect...'
+  end do
+
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  if (buffer_ready) then
+    call delete_selection_buffer(b)
+  endif
+  deallocate(pt2_data)
+end subroutine
+
+
+subroutine run_pt2_slave_large(thread,iproc,energy)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states_diag)
+  integer,  intent(in)            :: thread, iproc
+  integer                         :: rc, i
+
+  integer                        :: worker_id, ctask, ltask
+  character*(512)                :: task
+  integer                        :: task_id(1)
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket
+  integer(ZMQ_PTR)               :: zmq_socket_push
+
+  type(selection_buffer) :: b
+  logical :: done, buffer_ready
+
+  type(pt2_type) :: pt2_data
+  integer :: n_tasks, k, N
+  integer :: i_generator, subset
+
+  integer :: bsize ! Size of selection buffers
+  logical :: sending
+  double precision :: time_shift
+
+  PROVIDE global_selection_buffer global_selection_buffer_lock
+
+  call random_number(time_shift)
+  time_shift = time_shift*15.d0
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  b%N = 0
+  buffer_ready = .False.
+  n_tasks = 1
+
+  sending = .False.
+  done = .False.
+  double precision :: time0, time1
+  call wall_time(time0)
+  time0 = time0+time_shift
+  do while (.not.done)
+
+    integer, external :: get_tasks_from_taskserver
+    if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
+      exit
+    endif
+    done = task_id(1) == 0
+    if (done) then
+      n_tasks = n_tasks-1
+    endif
+    if (n_tasks == 0) exit
+
+    call sscanf_ddd(task, subset, i_generator, N)
+    if( pt2_F(i_generator) <= 0 .or. pt2_F(i_generator) > N_det ) then
+      print *,  irp_here
+      stop 'bug in selection'
+    endif
+    if (b%N == 0) then
+      ! Only first time
+      bsize = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
+      call create_selection_buffer(bsize, bsize*2, b)
+      buffer_ready = .True.
+    else
+      ASSERT (b%N == bsize)
+    endif
+
+    call pt2_alloc(pt2_data,N_states)
+    b%cur = 0
+    call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator))
+
+    integer, external :: tasks_done_to_taskserver
+    if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
+      done = .true.
+    endif
+    call sort_selection_buffer(b)
+
+    call wall_time(time1)
+!    if (time1-time0 > 15.d0) then
+      call omp_set_lock(global_selection_buffer_lock)
+      global_selection_buffer%mini = b%mini
+      call merge_selection_buffers(b,global_selection_buffer)
+      b%cur=0
+      call omp_unset_lock(global_selection_buffer_lock)
+      call wall_time(time0)
+!    endif
+
+    call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
+    if ( iproc == 1 .or. i_generator < 100 .or. done) then
+      call omp_set_lock(global_selection_buffer_lock)
+      call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending)
+      global_selection_buffer%cur = 0
+      call omp_unset_lock(global_selection_buffer_lock)
+    else
+      call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), b, (/task_id/), 1,sending)
+    endif
+
+    call pt2_dealloc(pt2_data)
+  end do
+  call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending)
+
+  integer, external :: disconnect_from_taskserver
+  do i=1,300
+    if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit
+    call sleep(1)
+    print *,  'Retry disconnect...'
+  end do
+
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  if (buffer_ready) then
+    call delete_selection_buffer(b)
+  endif
+  FREE global_selection_buffer
+end subroutine
+
+
+subroutine push_pt2_results(zmq_socket_push, index, pt2_data, b, task_id, n_tasks)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  type(pt2_type), intent(in)     :: pt2_data(n_tasks)
+  integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
+  type(selection_buffer), intent(inout) :: b
+
+  logical :: sending
+  sending = .False.
+  call push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending)
+  call push_pt2_results_async_recv(zmq_socket_push, b%mini, sending)
+end subroutine
+
+
+subroutine push_pt2_results_async_send(zmq_socket_push, index, pt2_data, b, task_id, n_tasks, sending)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  type(pt2_type), intent(in)     :: pt2_data(n_tasks)
+  integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
+  type(selection_buffer), intent(inout) :: b
+  logical, intent(inout) :: sending
+  integer :: rc, i
+  integer*8 :: rc8
+  double precision, allocatable :: pt2_serialized(:,:)
+
+  if (sending) then
+    print *,  irp_here, ': sending is true'
+    stop -1
+  endif
+  sending = .True.
+
+  rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 1
+    return
+  else if(rc /= 4) then
+    stop 'push'
+  endif
+
+
+  rc = f77_zmq_send( zmq_socket_push, index, 4*n_tasks, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 2
+    return
+  else if(rc /= 4*n_tasks) then
+    stop 'push'
+  endif
+
+
+  allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) )
+  do i=1,n_tasks
+    call pt2_serialize(pt2_data(i),N_states,pt2_serialized(1,i))
+  enddo
+
+  rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE)
+  deallocate(pt2_serialized)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 3
+    return
+  else if(rc /= size(pt2_serialized)*8) then
+    stop 'push'
+  endif
+
+
+  rc = f77_zmq_send( zmq_socket_push, task_id, n_tasks*4, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 6
+    return
+  else if(rc /= 4*n_tasks) then
+    stop 'push'
+  endif
+
+
+  if (b%cur == 0) then
+
+    rc = f77_zmq_send( zmq_socket_push, b%cur, 4, 0)
+    if (rc == -1) then
+      print *,  irp_here, ': error sending result'
+      stop 7
+      return
+    else if(rc /= 4) then
+      stop 'push'
+    endif
+
+  else
+
+    rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)
+    if (rc == -1) then
+      print *,  irp_here, ': error sending result'
+      stop 7
+      return
+    else if(rc /= 4) then
+      stop 'push'
+    endif
+
+
+    rc8 = f77_zmq_send8( zmq_socket_push, b%val, 8_8*int(b%cur,8), ZMQ_SNDMORE)
+    if (rc8 == -1_8) then
+      print *,  irp_here, ': error sending result'
+      stop 8
+      return
+    else if(rc8 /= 8_8*int(b%cur,8)) then
+      stop 'push'
+    endif
+
+
+    rc8 = f77_zmq_send8( zmq_socket_push, b%det, int(bit_kind*N_int*2,8)*int(b%cur,8), 0)
+    if (rc8 == -1_8) then
+      print *,  irp_here, ': error sending result'
+      stop 9
+      return
+    else if(rc8 /= int(N_int*2*8,8)*int(b%cur,8)) then
+      stop 'push'
+    endif
+
+  endif
+
+end subroutine
+
+subroutine push_pt2_results_async_recv(zmq_socket_push,mini,sending)
+ use selection_types
+ use f77_zmq
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)    :: zmq_socket_push
+  double precision, intent(out) :: mini
+  logical, intent(inout) :: sending
+  integer :: rc
+
+  if (.not.sending) return
+
+! Activate is zmq_socket_push is a REQ
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  character*(2) :: ok
+  rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 10
+    return
+  else if ((rc /= 2).and.(ok(1:2) /= 'ok')) then
+    print *,  irp_here//': error in receiving ok'
+    stop -1
+  endif
+  rc = f77_zmq_recv( zmq_socket_push, mini, 8, 0)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 11
+    return
+  else if (rc /= 8) then
+    print *,  irp_here//': error in receiving mini'
+    stop 12
+  endif
+IRP_ENDIF
+  sending = .False.
+end subroutine
+
+
+
+subroutine pull_pt2_results(zmq_socket_pull, index, pt2_data, task_id, n_tasks, b)
+ use selection_types
+ use f77_zmq
+  implicit none
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  type(pt2_type), intent(inout)  :: pt2_data(*)
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(out) :: index(*)
+  integer, intent(out) :: n_tasks, task_id(*)
+  integer :: rc, rn, i
+  integer*8 :: rc8
+  double precision, allocatable :: pt2_serialized(:,:)
+
+  rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4) then
+    stop 'pull'
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, index, 4*n_tasks, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4*n_tasks) then
+    stop 'pull'
+  endif
+
+  allocate(pt2_serialized (pt2_type_size(N_states),n_tasks) )
+  rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized)*n_tasks, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 8*size(pt2_serialized)) then
+    stop 'pull'
+  endif
+
+  do i=1,n_tasks
+    call pt2_deserialize(pt2_data(i),N_states,pt2_serialized(1,i))
+  enddo
+  deallocate(pt2_serialized)
+
+  rc = f77_zmq_recv( zmq_socket_pull, task_id, n_tasks*4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4*n_tasks) then
+    stop 'pull'
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, b%cur, 4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if(rc /= 4) then
+    stop 'pull'
+  endif
+
+  if (b%cur > 0) then
+
+    rc8 = f77_zmq_recv8( zmq_socket_pull, b%val, 8_8*int(b%cur,8), 0)
+    if (rc8 == -1_8) then
+      n_tasks = 1
+      task_id(1) = 0
+    else if(rc8 /= 8_8*int(b%cur,8)) then
+      stop 'pull'
+    endif
+
+    rc8 = f77_zmq_recv8( zmq_socket_pull, b%det, int(bit_kind*N_int*2,8)*int(b%cur,8), 0)
+    if (rc8 == -1_8) then
+      n_tasks = 1
+      task_id(1) = 0
+    else if(rc8 /= int(N_int*2*8,8)*int(b%cur,8)) then
+      stop 'pull'
+    endif
+
+  endif
+
+! Activate is zmq_socket_pull is a REP
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, ZMQ_SNDMORE)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  else if (rc /= 2) then
+    print *,  irp_here//': error in sending ok'
+    stop -1
+  endif
+  rc = f77_zmq_send( zmq_socket_pull, b%mini, 8, 0)
+IRP_ENDIF
+
+end subroutine
+
--- a/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f
+++ b/src/cipsi_tc_bi_ortho/run_selection_slave.irp.f
@ -0,0 +1,255 @@
+subroutine run_selection_slave(thread, iproc, energy)
+
+  use f77_zmq
+  use selection_types
+
+  implicit none
+
+  double precision, intent(in) :: energy(N_states)
+  integer,          intent(in) :: thread, iproc
+
+  integer                      :: rc, i
+  integer                      :: worker_id, task_id(1), ctask, ltask
+  character*(512)              :: task
+  integer(ZMQ_PTR)             :: zmq_to_qp_run_socket
+  integer(ZMQ_PTR)             :: zmq_socket_push
+  integer(ZMQ_PTR), external   :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR), external   :: new_zmq_push_socket
+  type(selection_buffer)       :: buf, buf2
+  type(pt2_type)               :: pt2_data
+  logical                      :: done, buffer_ready
+
+  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+  PROVIDE psi_bilinear_matrix_transp_order N_int pt2_F pseudo_sym
+  PROVIDE psi_selectors_coef_transp_tc psi_det_sorted_tc weight_selection
+
+  call pt2_alloc(pt2_data,N_states)
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  buf%N = 0
+  buffer_ready = .False.
+  ctask = 1
+
+  do
+    integer, external :: get_task_from_taskserver
+    if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) == -1) then
+      exit
+    endif
+    done = task_id(ctask) == 0
+    if (done) then
+      ctask = ctask - 1
+    else
+      integer :: i_generator, N, subset, bsize
+      call sscanf_ddd(task, subset, i_generator, N)
+      if(buf%N == 0) then
+        ! Only first time
+        call create_selection_buffer(N, N*2, buf)
+        buffer_ready = .True.
+      else
+        if (N /= buf%N) then
+          print *, 'N=', N
+          print *, 'buf%N=', buf%N
+          print *, 'bug in ', irp_here
+          stop '-1'
+        end if
+      end if
+      call select_connected(i_generator, energy, pt2_data, buf,subset, pt2_F(i_generator))
+    endif
+
+    integer, external :: task_done_to_taskserver
+
+    if(done .or. ctask == size(task_id)) then
+      do i=1, ctask
+         if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then
+           call usleep(100)
+          if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then
+            ctask = 0
+            done = .true.
+            exit
+          endif
+         endif
+      end do
+      if(ctask > 0) then
+        call sort_selection_buffer(buf)
+!        call merge_selection_buffers(buf,buf2)
+        call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask)
+        call pt2_dealloc(pt2_data)
+        call pt2_alloc(pt2_data,N_states)
+!        buf%mini = buf2%mini
+        buf%cur = 0
+      end if
+      ctask = 0
+    end if
+
+    if(done) exit
+    ctask = ctask + 1
+  end do
+
+  if(ctask > 0) then
+    call sort_selection_buffer(buf)
+!    call merge_selection_buffers(buf,buf2)
+    call push_selection_results(zmq_socket_push, pt2_data, buf, task_id(1), ctask)
+!    buf%mini = buf2%mini
+    buf%cur = 0
+  end if
+  ctask = 0
+  call pt2_dealloc(pt2_data)
+
+  integer, external :: disconnect_from_taskserver
+  if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then
+    continue
+  endif
+
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  if (buffer_ready) then
+    call delete_selection_buffer(buf)
+!    call delete_selection_buffer(buf2)
+  endif
+end subroutine
+
+
+subroutine push_selection_results(zmq_socket_push, pt2_data, b, task_id, ntasks)
+  use f77_zmq
+  use selection_types
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  type(pt2_type), intent(in)     :: pt2_data
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in) :: ntasks, task_id(*)
+  integer :: rc
+  double precision, allocatable :: pt2_serialized(:)
+
+  rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)
+  if(rc /= 4) then
+    print *,  'f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)'
+  endif
+
+
+  allocate(pt2_serialized (pt2_type_size(N_states)) )
+  call pt2_serialize(pt2_data,N_states,pt2_serialized)
+
+  rc = f77_zmq_send( zmq_socket_push, pt2_serialized, size(pt2_serialized)*8, ZMQ_SNDMORE)
+  if (rc == -1) then
+    print *,  irp_here, ': error sending result'
+    stop 3
+    return
+  else if(rc /= size(pt2_serialized)*8) then
+    stop 'push'
+  endif
+  deallocate(pt2_serialized)
+
+  if (b%cur > 0) then
+
+      rc = f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE)
+      if(rc /= 8*b%cur) then
+        print *,  'f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE)'
+      endif
+
+      rc = f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE)
+      if(rc /= bit_kind*N_int*2*b%cur) then
+        print *,  'f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE)'
+      endif
+
+  endif
+
+  rc = f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)
+  if(rc /= 4) then
+    print *,  'f77_zmq_send( zmq_socket_push, ntasks, 4, ZMQ_SNDMORE)'
+  endif
+
+  rc = f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)
+  if(rc /= 4*ntasks) then
+    print *,  'f77_zmq_send( zmq_socket_push, task_id(1), ntasks*4, 0)'
+  endif
+
+! Activate is zmq_socket_push is a REQ
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  character*(2) :: ok
+  rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
+  if ((rc /= 2).and.(ok(1:2) /= 'ok')) then
+    print *,  irp_here//': error in receiving ok'
+    stop -1
+  endif
+IRP_ENDIF
+
+end subroutine
+
+
+subroutine pull_selection_results(zmq_socket_pull, pt2_data, val, det, N, task_id, ntasks)
+  use f77_zmq
+  use selection_types
+  implicit none
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  type(pt2_type), intent(inout) :: pt2_data
+  double precision, intent(out) :: val(*)
+  integer(bit_kind), intent(out) :: det(N_int, 2, *)
+  integer, intent(out) :: N, ntasks, task_id(*)
+  integer :: rc, rn, i
+  double precision, allocatable :: pt2_serialized(:)
+
+  rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0)
+  if(rc /= 4) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, N, 4, 0)'
+  endif
+
+  allocate(pt2_serialized (pt2_type_size(N_states)) )
+  rc = f77_zmq_recv( zmq_socket_pull, pt2_serialized, 8*size(pt2_serialized), 0)
+  if (rc == -1) then
+    ntasks = 1
+    task_id(1) = 0
+  else if(rc /= 8*size(pt2_serialized)) then
+    stop 'pull'
+  endif
+
+  call pt2_deserialize(pt2_data,N_states,pt2_serialized)
+  deallocate(pt2_serialized)
+
+  if (N>0) then
+      rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)
+      if(rc /= 8*N) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0)
+      if(rc /= bit_kind*N_int*2*N) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0)'
+      endif
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)
+  if(rc /= 4) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, ntasks, 4, 0)'
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)
+  if(rc /= 4*ntasks) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, task_id(1), ntasks*4, 0)'
+  endif
+
+! Activate is zmq_socket_pull is a REP
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0)
+  if (rc /= 2) then
+    print *,  irp_here//': error in sending ok'
+    stop -1
+  endif
+IRP_ENDIF
+end subroutine
+
+
+
--- a/src/cipsi_tc_bi_ortho/selection.irp.f
+++ b/src/cipsi_tc_bi_ortho/selection.irp.f
--- a/src/cipsi_tc_bi_ortho/selection_buffer.irp.f
+++ b/src/cipsi_tc_bi_ortho/selection_buffer.irp.f
@ -0,0 +1,416 @@
+
+subroutine create_selection_buffer(N, size_in, res)
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Allocates the memory for a selection buffer.
+! The arrays have dimension size_in and the maximum number of elements is N
+  END_DOC
+
+  integer, intent(in) :: N, size_in
+  type(selection_buffer), intent(out) :: res
+
+  integer :: siz
+  siz = max(size_in,1)
+
+  double precision :: rss
+  double precision, external :: memory_of_double
+  rss = memory_of_double(siz)*(N_int*2+1)
+  call check_mem(rss,irp_here)
+
+  allocate(res%det(N_int, 2, siz), res%val(siz))
+
+  res%val(:) = 0d0
+  res%det(:,:,:) = 0_8
+  res%N = N
+  res%mini = 0d0
+  res%cur = 0
+end subroutine
+
+subroutine delete_selection_buffer(b)
+  use selection_types
+  implicit none
+  type(selection_buffer), intent(inout) :: b
+  if (associated(b%det)) then
+    deallocate(b%det)
+  endif
+  if (associated(b%val)) then
+    deallocate(b%val)
+  endif
+  NULLIFY(b%det)
+  NULLIFY(b%val)
+  b%cur = 0
+  b%mini = 0.d0
+  b%N = 0
+end
+
+
+subroutine add_to_selection_buffer(b, det, val)
+  use selection_types
+  implicit none
+
+  type(selection_buffer), intent(inout) :: b
+  integer(bit_kind), intent(in) :: det(N_int, 2)
+  double precision, intent(in) :: val
+  integer :: i
+
+  if(b%N > 0 .and. val <= b%mini) then
+    b%cur += 1
+    b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2)
+    b%val(b%cur) = val
+    if(b%cur == size(b%val)) then
+      call sort_selection_buffer(b)
+    end if
+  end if
+end subroutine
+
+subroutine merge_selection_buffers(b1, b2)
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Merges the selection buffers b1 and b2 into b2
+  END_DOC
+  type(selection_buffer), intent(inout) :: b1
+  type(selection_buffer), intent(inout) :: b2
+  integer(bit_kind), pointer     :: detmp(:,:,:)
+  double precision, pointer      :: val(:)
+  integer                        :: i, i1, i2, k, nmwen, sze
+  if (b1%cur == 0) return
+  do while (b1%val(b1%cur) > b2%mini)
+    b1%cur = b1%cur-1
+    if (b1%cur == 0) then
+      return
+    endif
+  enddo
+  nmwen = min(b1%N, b1%cur+b2%cur)
+  double precision :: rss
+  double precision, external :: memory_of_double
+  sze = max(size(b1%val), size(b2%val))
+  rss = memory_of_double(sze) + 2*N_int*memory_of_double(sze)
+  call check_mem(rss,irp_here)
+  allocate(val(sze), detmp(N_int, 2, sze))
+  i1=1
+  i2=1
+  do i=1,nmwen
+    if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then
+      exit
+    else if (i1 > b1%cur) then
+        val(i) = b2%val(i2)
+        detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2)
+        detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2)
+        i2=i2+1
+    else if (i2 > b2%cur) then
+        val(i) = b1%val(i1)
+        detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1)
+        detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1)
+        i1=i1+1
+    else
+      if (b1%val(i1) <= b2%val(i2)) then
+        val(i) = b1%val(i1)
+        detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1)
+        detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1)
+        i1=i1+1
+      else
+        val(i) = b2%val(i2)
+        detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2)
+        detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2)
+        i2=i2+1
+      endif
+    endif
+  enddo
+  deallocate(b2%det, b2%val)
+  do i=nmwen+1,b2%N
+    val(i) = 0.d0
+    detmp(1:N_int,1:2,i) = 0_bit_kind
+  enddo
+  b2%det => detmp
+  b2%val => val
+  b2%mini = min(b2%mini,b2%val(b2%N))
+  b2%cur = nmwen
+end
+
+
+subroutine sort_selection_buffer(b)
+  use selection_types
+  implicit none
+
+  type(selection_buffer), intent(inout) :: b
+  integer, allocatable :: iorder(:)
+  integer(bit_kind), pointer :: detmp(:,:,:)
+  integer :: i, nmwen
+  logical, external :: detEq
+  if (b%N == 0 .or. b%cur == 0) return
+  nmwen = min(b%N, b%cur)
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_int(b%cur) + 2*N_int*memory_of_double(size(b%det,3))
+  call check_mem(rss,irp_here)
+  allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3)))
+  do i=1,b%cur
+    iorder(i) = i
+  end do
+  call dsort(b%val, iorder, b%cur)
+  do i=1, nmwen
+    detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i))
+    detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i))
+  end do
+  deallocate(b%det,iorder)
+  b%det => detmp
+  b%mini = min(b%mini,b%val(b%N))
+  b%cur = nmwen
+end subroutine
+
+subroutine make_selection_buffer_s2(b)
+  use selection_types
+  type(selection_buffer), intent(inout) :: b
+
+  integer(bit_kind), allocatable :: o(:,:,:)
+  double precision, allocatable  :: val(:)
+
+  integer :: n_d
+  integer :: i,k,sze,n_alpha,j,n
+  logical                        :: dup
+
+  ! Sort
+  integer, allocatable           :: iorder(:)
+  integer*8, allocatable         :: bit_tmp(:)
+  integer*8, external            :: configuration_search_key
+  integer(bit_kind), allocatable :: tmp_array(:,:,:)
+  logical, allocatable           :: duplicate(:)
+
+  n_d = b%cur
+  double precision :: rss
+  double precision, external :: memory_of_double
+  rss = (4*N_int+4)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+  allocate(o(N_int,2,n_d), iorder(n_d), duplicate(n_d), bit_tmp(n_d), &
+           tmp_array(N_int,2,n_d), val(n_d) )
+
+  do i=1,n_d
+    do k=1,N_int
+      o(k,1,i) = ieor(b%det(k,1,i), b%det(k,2,i))
+      o(k,2,i) = iand(b%det(k,1,i), b%det(k,2,i))
+    enddo
+    iorder(i) = i
+    bit_tmp(i) = configuration_search_key(o(1,1,i),N_int)
+  enddo
+
+  deallocate(b%det)
+
+  call i8sort(bit_tmp,iorder,n_d)
+
+  do i=1,n_d
+    do k=1,N_int
+      tmp_array(k,1,i) = o(k,1,iorder(i))
+      tmp_array(k,2,i) = o(k,2,iorder(i))
+    enddo
+    val(i) = b%val(iorder(i))
+    duplicate(i) = .False.
+  enddo
+
+  ! Find duplicates
+  do i=1,n_d-1
+    if (duplicate(i)) then
+      cycle
+    endif
+    j = i+1
+    do while (bit_tmp(j)==bit_tmp(i))
+      if (duplicate(j)) then
+        j+=1
+        if (j>n_d) then
+          exit
+        endif
+        cycle
+      endif
+      dup = .True.
+      do k=1,N_int
+        if ( (tmp_array(k,1,i) /= tmp_array(k,1,j))                   &
+              .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then
+          dup = .False.
+          exit
+        endif
+      enddo
+      if (dup) then
+        val(i) = max(val(i), val(j))
+        duplicate(j) = .True.
+      endif
+      j+=1
+      if (j>n_d) then
+        exit
+      endif
+    enddo
+  enddo
+
+  deallocate (b%val)
+  ! Copy filtered result
+  integer :: n_p
+  n_p=0
+  do i=1,n_d
+    if (duplicate(i)) then
+      cycle
+    endif
+    n_p = n_p + 1
+    do k=1,N_int
+      o(k,1,n_p) = tmp_array(k,1,i)
+      o(k,2,n_p) = tmp_array(k,2,i)
+    enddo
+    val(n_p) = val(i)
+  enddo
+
+  ! Sort by importance
+  do i=1,n_p
+    iorder(i) = i
+  end do
+  call dsort(val,iorder,n_p)
+  do i=1,n_p
+    do k=1,N_int
+      tmp_array(k,1,i) = o(k,1,iorder(i))
+      tmp_array(k,2,i) = o(k,2,iorder(i))
+    enddo
+  enddo
+  do i=1,n_p
+    do k=1,N_int
+      o(k,1,i) = tmp_array(k,1,i)
+      o(k,2,i) = tmp_array(k,2,i)
+    enddo
+  enddo
+
+  ! Create determinants
+  n_d = 0
+  do i=1,n_p
+    call configuration_to_dets_size(o(1,1,i),sze,elec_alpha_num,N_int)
+    n_d = n_d + sze
+    if (n_d > b%cur) then
+!      if (n_d - b%cur > b%cur - n_d + sze) then
+!        n_d = n_d - sze
+!      endif
+      exit
+    endif
+  enddo
+
+  rss = (4*N_int+2)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+  allocate(b%det(N_int,2,2*n_d), b%val(2*n_d))
+  k=1
+  do i=1,n_p
+    n=n_d
+    call configuration_to_dets_size(o(1,1,i),n,elec_alpha_num,N_int)
+    call configuration_to_dets(o(1,1,i),b%det(1,1,k),n,elec_alpha_num,N_int)
+    do j=k,k+n-1
+      b%val(j) = val(i)
+    enddo
+    k = k+n
+    if (k > n_d) exit
+  enddo
+  deallocate(o)
+  b%cur = n_d
+  b%N = n_d
+end
+
+
+
+
+subroutine remove_duplicates_in_selection_buffer(b)
+  use selection_types
+  type(selection_buffer), intent(inout) :: b
+
+  integer(bit_kind), allocatable :: o(:,:,:)
+  double precision, allocatable  :: val(:)
+
+  integer :: n_d
+  integer :: i,k,sze,n_alpha,j,n
+  logical                        :: dup
+
+  ! Sort
+  integer, allocatable           :: iorder(:)
+  integer*8, allocatable         :: bit_tmp(:)
+  integer*8, external            :: det_search_key
+  integer(bit_kind), allocatable :: tmp_array(:,:,:)
+  logical, allocatable           :: duplicate(:)
+
+  n_d = b%cur
+  logical                        :: found_duplicates
+  double precision               :: rss
+  double precision, external     :: memory_of_double
+  rss = (4*N_int+4)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+
+  found_duplicates = .False.
+  allocate(iorder(n_d), duplicate(n_d), bit_tmp(n_d), &
+           tmp_array(N_int,2,n_d), val(n_d) )
+
+  do i=1,n_d
+    iorder(i) = i
+    bit_tmp(i) = det_search_key(b%det(1,1,i),N_int)
+  enddo
+
+  call i8sort(bit_tmp,iorder,n_d)
+
+  do i=1,n_d
+    do k=1,N_int
+      tmp_array(k,1,i) = b%det(k,1,iorder(i))
+      tmp_array(k,2,i) = b%det(k,2,iorder(i))
+    enddo
+    val(i) = b%val(iorder(i))
+    duplicate(i) = .False.
+  enddo
+
+  ! Find duplicates
+  do i=1,n_d-1
+    if (duplicate(i)) then
+      cycle
+    endif
+    j = i+1
+    do while (bit_tmp(j)==bit_tmp(i))
+      if (duplicate(j)) then
+        j+=1
+        if (j>n_d) then
+          exit
+        endif
+        cycle
+      endif
+      dup = .True.
+      do k=1,N_int
+        if ( (tmp_array(k,1,i) /= tmp_array(k,1,j))                   &
+              .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then
+          dup = .False.
+          exit
+        endif
+      enddo
+      if (dup) then
+        duplicate(j) = .True.
+        found_duplicates = .True.
+      endif
+      j+=1
+      if (j>n_d) then
+        exit
+      endif
+    enddo
+  enddo
+
+  if (found_duplicates) then
+
+    ! Copy filtered result
+    integer :: n_p
+    n_p=0
+    do i=1,n_d
+      if (duplicate(i)) then
+        cycle
+      endif
+      n_p = n_p + 1
+      do k=1,N_int
+        b%det(k,1,n_p) = tmp_array(k,1,i)
+        b%det(k,2,n_p) = tmp_array(k,2,i)
+      enddo
+      val(n_p) = val(i)
+    enddo
+    b%cur=n_p
+    b%N=n_p
+
+  endif
+
+end
+
+
+
--- a/src/cipsi_tc_bi_ortho/selection_types.f90
+++ b/src/cipsi_tc_bi_ortho/selection_types.f90
@ -0,0 +1,25 @@
+module selection_types
+  type selection_buffer
+    integer :: N, cur
+    integer(8)      , pointer :: det(:,:,:)
+    double precision, pointer :: val(:)
+    double precision          :: mini
+  endtype
+
+  type pt2_type
+    double precision, allocatable :: pt2(:)
+    double precision, allocatable :: rpt2(:)
+    double precision, allocatable :: variance(:)
+    double precision, allocatable :: overlap(:,:)
+  endtype
+
+  contains
+
+  integer function pt2_type_size(N)
+    implicit none
+    integer, intent(in) :: N
+    pt2_type_size = (3*n + n*n)
+  end function
+
+end module
+
--- a/src/cipsi_tc_bi_ortho/selection_weight.irp.f
+++ b/src/cipsi_tc_bi_ortho/selection_weight.irp.f
@ -0,0 +1,134 @@
+BEGIN_PROVIDER [ double precision, pt2_match_weight, (N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Weights adjusted along the selection to make the PT2 contributions
+ ! of each state coincide.
+ END_DOC
+ pt2_match_weight(:) = 1.d0
+END_PROVIDER
+
+
+
+BEGIN_PROVIDER [ double precision, variance_match_weight, (N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Weights adjusted along the selection to make the variances
+ ! of each state coincide.
+ END_DOC
+ variance_match_weight(:) = 1.d0
+END_PROVIDER
+
+
+
+subroutine update_pt2_and_variance_weights(pt2_data, N_st)
+  implicit none
+  use selection_types
+  BEGIN_DOC
+! Updates the PT2- and Variance- matching weights.
+  END_DOC
+  integer, intent(in)          :: N_st
+  type(pt2_type), intent(in)   :: pt2_data
+  double precision             :: pt2(N_st)
+  double precision             :: variance(N_st)
+
+  double precision :: avg, element, dt, x
+  integer          :: k
+  pt2(:)      = pt2_data % pt2(:)
+  variance(:) = pt2_data % variance(:)
+
+  avg = sum(pt2(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero
+
+  dt = 8.d0 !* selection_factor
+  do k=1,N_st
+    element = exp(dt*(pt2(k)/avg - 1.d0))
+    element = min(2.0d0 , element)
+    element = max(0.5d0 , element)
+    pt2_match_weight(k) *= element
+  enddo
+
+
+  avg = sum(variance(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero
+
+  do k=1,N_st
+    element = exp(dt*(variance(k)/avg -1.d0))
+    element = min(2.0d0 , element)
+    element = max(0.5d0 , element)
+    variance_match_weight(k) *= element
+  enddo
+
+  if (N_det < 100) then
+    ! For tiny wave functions, weights are 1.d0
+    pt2_match_weight(:) = 1.d0
+    variance_match_weight(:) = 1.d0
+  endif
+
+  threshold_davidson_pt2 = min(1.d-6, &
+     max(threshold_davidson, 1.e-1 * PT2_relative_error * minval(abs(pt2(1:N_states)))) )
+
+  SOFT_TOUCH pt2_match_weight variance_match_weight threshold_davidson_pt2
+end
+
+
+
+
+BEGIN_PROVIDER [ double precision, selection_weight, (N_states) ]
+   implicit none
+   BEGIN_DOC
+   ! Weights used in the selection criterion
+   END_DOC
+   select case (weight_selection)
+
+     case (0)
+      print *,  'Using input weights in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * state_average_weight(1:N_states)
+
+     case (1)
+      print *,  'Using 1/c_max^2 weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states)
+
+     case (2)
+      print *,  'Using pt2-matching weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * pt2_match_weight(1:N_states)
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+
+     case (3)
+      print *,  'Using variance-matching weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (4)
+      print *,  'Using variance- and pt2-matching weights in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * sqrt(variance_match_weight(1:N_states) * pt2_match_weight(1:N_states))
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (5)
+      print *,  'Using variance-matching weight in selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (6)
+      print *,  'Using CI coefficient-based selection'
+      selection_weight(1:N_states) = c0_weight(1:N_states)
+
+     case (7)
+      print *,  'Input weights multiplied by variance- and pt2-matching'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * sqrt(variance_match_weight(1:N_states) * pt2_match_weight(1:N_states)) * state_average_weight(1:N_states)
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+     case (8)
+      print *,  'Input weights multiplied by pt2-matching'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * pt2_match_weight(1:N_states) * state_average_weight(1:N_states)
+      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
+
+     case (9)
+      print *,  'Input weights multiplied by variance-matching'
+      selection_weight(1:N_states) = c0_weight(1:N_states) * variance_match_weight(1:N_states) * state_average_weight(1:N_states)
+      print *, '# var weight ', real(variance_match_weight(:),4)
+
+    end select
+     print *, '# Total weight ', real(selection_weight(:),4)
+
+END_PROVIDER
+
--- a/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f
+++ b/src/cipsi_tc_bi_ortho/slave_cipsi.irp.f
@ -0,0 +1,350 @@
+subroutine run_slave_cipsi
+
+  BEGIN_DOC
+  ! Helper program for distributed parallelism
+  END_DOC
+
+  implicit none
+
+  call omp_set_max_active_levels(1)
+  distributed_davidson = .False.
+  read_wf = .False.
+  SOFT_TOUCH read_wf distributed_davidson
+  call provide_everything
+  call switch_qp_run_to_master
+  call run_slave_main
+end
+
+subroutine provide_everything
+  PROVIDE H_apply_buffer_allocated mo_two_e_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context N_states_diag
+  PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp
+
+  PROVIDE pt2_e0_denominator mo_num N_int ci_energy mpi_master zmq_state zmq_context
+  PROVIDE psi_det psi_coef threshold_generators state_average_weight
+  PROVIDE N_det_selectors pt2_stoch_istate N_det selection_weight pseudo_sym
+end
+
+
+subroutine run_slave_main
+
+  use f77_zmq
+
+  implicit none
+  IRP_IF MPI
+    include 'mpif.h'
+  IRP_ENDIF
+
+  integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR) :: zmq_to_qp_run_socket
+  double precision :: energy(N_states)
+  character*(64) :: states(10)
+  character*(64) :: old_state
+  integer :: rc, i, ierr
+  double precision :: t0, t1
+
+  integer, external              :: zmq_get_dvector, zmq_get_N_det_generators
+  integer, external              :: zmq_get8_dvector
+  integer, external              :: zmq_get_ivector
+  integer, external              :: zmq_get_psi, zmq_get_N_det_selectors, zmq_get_psi_bilinear
+  integer, external              :: zmq_get_psi_notouch
+  integer, external              :: zmq_get_N_states_diag
+
+  zmq_context = f77_zmq_ctx_new ()
+  states(1) = 'selection'
+  states(2) = 'davidson'
+  states(3) = 'pt2'
+  old_state = 'Waiting'
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  PROVIDE psi_det psi_coef threshold_generators state_average_weight mpi_master
+  PROVIDE zmq_state N_det_selectors pt2_stoch_istate N_det pt2_e0_denominator
+  PROVIDE N_det_generators N_states N_states_diag pt2_e0_denominator mpi_rank
+
+  IRP_IF MPI
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  do
+
+    if (mpi_master) then
+      call wait_for_states(states,zmq_state,size(states))
+      if (zmq_state(1:64) == old_state(1:64)) then
+        call usleep(200)
+        cycle
+      else
+        old_state(1:64) = zmq_state(1:64)
+      endif
+      print *,  trim(zmq_state)
+    endif
+
+    IRP_IF MPI_DEBUG
+      print *,  irp_here, mpi_rank
+      call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+    IRP_ENDIF
+    IRP_IF MPI
+      call MPI_BCAST (zmq_state, 128, MPI_CHARACTER, 0, MPI_COMM_WORLD, ierr)
+      if (ierr /= MPI_SUCCESS) then
+        print *,  irp_here, 'error in broadcast of zmq_state'
+      endif
+    IRP_ENDIF
+
+    if(zmq_state(1:7) == 'Stopped') then
+      exit
+    endif
+
+
+    if (zmq_state(1:9) == 'selection') then
+
+      ! Selection
+      ! ---------
+
+      call wall_time(t0)
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_psi')
+      IRP_ENDIF
+      if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector threshold_generators')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector energy')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_generators')
+      IRP_ENDIF
+      if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_selectors')
+      IRP_ENDIF
+      if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector state_average_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector selection_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) cycle
+      pt2_e0_denominator(1:N_states) = energy(1:N_states)
+      TOUCH pt2_e0_denominator state_average_weight threshold_generators selection_weight psi_det psi_coef
+
+      if (mpi_master) then
+        print *,  'N_det', N_det
+        print *,  'N_det_generators', N_det_generators
+        print *,  'N_det_selectors', N_det_selectors
+        print *,  'pt2_e0_denominator', pt2_e0_denominator
+        print *,  'pt2_stoch_istate', pt2_stoch_istate
+        print *,  'state_average_weight', state_average_weight
+        print *,  'selection_weight', selection_weight
+      endif
+      call wall_time(t1)
+      call write_double(6,(t1-t0),'Broadcast time')
+
+      IRP_IF MPI_DEBUG
+        call mpi_print('Entering OpenMP section')
+      IRP_ENDIF
+      !$OMP PARALLEL PRIVATE(i)
+      i = omp_get_thread_num()
+      call run_selection_slave(0,i,energy)
+      !$OMP END PARALLEL
+      print *,  mpi_rank, ': Selection done'
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call mpi_print('----------')
+
+    else if (zmq_state(1:8) == 'davidson') then
+
+      ! Davidson
+      ! --------
+
+      call wall_time(t0)
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_states_diag')
+      IRP_ENDIF
+      if (zmq_get_N_states_diag(zmq_to_qp_run_socket,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_psi')
+      IRP_ENDIF
+      if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
+
+      call wall_time(t1)
+      call write_double(6,(t1-t0),'Broadcast time')
+
+      !---
+      call omp_set_max_active_levels(8)
+      call davidson_slave_tcp(0)
+      call omp_set_max_active_levels(1)
+      print *,  mpi_rank, ': Davidson done'
+      !---
+
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call mpi_print('----------')
+
+    else if (zmq_state(1:3) == 'pt2') then
+
+      ! PT2
+      ! ---
+
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call wall_time(t0)
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_psi')
+      IRP_ENDIF
+      if (zmq_get_psi(zmq_to_qp_run_socket,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_generators')
+      IRP_ENDIF
+      if (zmq_get_N_det_generators (zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_N_det_selectors')
+      IRP_ENDIF
+      if (zmq_get_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector threshold_generators')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector energy')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'energy',energy,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_ivector pt2_stoch_istate')
+      IRP_ENDIF
+      if (zmq_get_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector state_average_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) cycle
+      IRP_IF MPI_DEBUG
+        call mpi_print('zmq_get_dvector selection_weight')
+      IRP_ENDIF
+      if (zmq_get_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) cycle
+      pt2_e0_denominator(1:N_states) = energy(1:N_states)
+      SOFT_TOUCH pt2_e0_denominator state_average_weight pt2_stoch_istate threshold_generators selection_weight psi_det psi_coef N_det_generators N_det_selectors
+
+
+      call wall_time(t1)
+      call write_double(6,(t1-t0),'Broadcast time')
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+
+
+      IRP_IF MPI_DEBUG
+        call mpi_print('Entering OpenMP section')
+      IRP_ENDIF
+      if (.true.) then
+        integer :: nproc_target, ii
+        double precision :: mem_collector, mem, rss
+
+        call resident_memory(rss)
+
+        nproc_target = nthreads_pt2
+        ii = min(N_det, (elec_alpha_num*(mo_num-elec_alpha_num))**2)
+
+        do
+          mem = rss +                             & !
+                nproc_target * 8.d0 *             & ! bytes
+                ( 0.5d0*pt2_n_tasks_max           & ! task_id
+                + 64.d0*pt2_n_tasks_max           & ! task
+                + 3.d0*pt2_n_tasks_max*N_states   & ! pt2, variance, norm
+                + 1.d0*pt2_n_tasks_max            & ! i_generator, subset
+                + 3.d0*(N_int*2.d0*ii+ ii)        & ! selection buffer
+                + 1.d0*(N_int*2.d0*ii+ ii)        & ! sort selection buffer
+                + 2.0d0*(ii)                      & ! preinteresting, interesting,
+                                                    ! prefullinteresting, fullinteresting
+                + 2.0d0*(N_int*2*ii)              & ! minilist, fullminilist
+                + 1.0d0*(N_states*mo_num*mo_num)  & ! mat
+                ) / 1024.d0**3
+
+          if (nproc_target == 0) then
+            call check_mem(mem,irp_here)
+            nproc_target = 1
+            exit
+          endif
+
+          if (mem+rss < qp_max_mem) then
+            exit
+          endif
+
+          nproc_target = nproc_target - 1
+
+        enddo
+        
+        if (N_det > 100000) then
+
+          if (mpi_master) then
+            print *,  'N_det', N_det
+            print *,  'N_det_generators', N_det_generators
+            print *,  'N_det_selectors', N_det_selectors
+            print *,  'pt2_e0_denominator', pt2_e0_denominator
+            print *,  'pt2_stoch_istate', pt2_stoch_istate
+            print *,  'state_average_weight', state_average_weight
+            print *,  'selection_weight', selection_weight
+            print *,  'Number of threads', nproc_target
+          endif
+
+          if (h0_type == 'CFG') then
+            PROVIDE det_to_configuration
+          endif
+
+          PROVIDE global_selection_buffer pt2_N_teeth pt2_F N_det_generators
+          PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+          PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+          PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+          PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted_tc
+          PROVIDE psi_selectors_rcoef_bi_orth_transp psi_selectors_lcoef_bi_orth_transp
+
+          PROVIDE psi_det_hii selection_weight pseudo_sym pt2_min_parallel_tasks
+
+          if (mpi_master) then
+            print *,  'Running PT2'
+          endif
+          !$OMP PARALLEL PRIVATE(i) NUM_THREADS(nproc_target+1)
+          i = omp_get_thread_num()
+          call run_pt2_slave(0,i,pt2_e0_denominator)
+          !$OMP END PARALLEL
+          FREE state_average_weight
+          print *,  mpi_rank, ': PT2 done'
+          print *,  '-------'
+
+        endif
+      endif
+
+      IRP_IF MPI
+        call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+        if (ierr /= MPI_SUCCESS) then
+          print *,  irp_here, 'error in barrier'
+        endif
+      IRP_ENDIF
+      call mpi_print('----------')
+
+    endif
+
+  end do
+  IRP_IF MPI
+    call MPI_finalize(ierr)
+  IRP_ENDIF
+end
+
+
+
--- a/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
+++ b/src/cipsi_tc_bi_ortho/stochastic_cipsi.irp.f
@ -0,0 +1,149 @@
+subroutine run_stochastic_cipsi
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Selected Full Configuration Interaction with Stochastic selection and PT2.
+  END_DOC
+  integer                        :: i,j,k,ndet
+  double precision, allocatable  :: zeros(:)
+  integer                        :: to_select
+  type(pt2_type)                 :: pt2_data, pt2_data_err
+  logical, external              :: qp_stop
+  logical                        :: print_pt2
+
+  double precision :: rss
+  double precision, external :: memory_of_double
+  double precision :: correlation_energy_ratio,E_denom,E_tc,norm
+  double precision, allocatable :: ept2(:), pt1(:),extrap_energy(:)
+  PROVIDE H_apply_buffer_allocated distributed_davidson 
+
+  print*,'Diagonal elements of the Fock matrix '
+  do i = 1, mo_num
+   write(*,*)i,Fock_matrix_tc_mo_tot(i,i)
+  enddo
+  N_iter = 1
+  threshold_generators = 1.d0
+  SOFT_TOUCH threshold_generators
+
+  rss = memory_of_double(N_states)*4.d0
+  call check_mem(rss,irp_here)
+
+  allocate (zeros(N_states))
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+
+  double precision               :: hf_energy_ref
+  logical                        :: has
+  double precision               :: relative_error
+
+  relative_error=PT2_relative_error
+
+  zeros = 0.d0
+  pt2_data % pt2   = -huge(1.e0)
+  pt2_data % rpt2  = -huge(1.e0)
+  pt2_data % overlap= 0.d0
+  pt2_data % variance = huge(1.e0)
+
+  !!!! WARNING  !!!! SEEMS TO BE PROBLEM WTH make_s2_eigenfunction !!!! THE DETERMINANTS CAN APPEAR TWICE IN THE WFT DURING SELECTION
+!  if (s2_eig) then
+!    call make_s2_eigenfunction
+!  endif
+  print_pt2 = .False.
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  call routine_save_right
+
+
+!  if (N_det > N_det_max) then
+!    psi_det(1:N_int,1:2,1:N_det) = psi_det_sorted_tc_gen(1:N_int,1:2,1:N_det)
+!    psi_coef(1:N_det,1:N_states) = psi_coef_sorted_tc_gen(1:N_det,1:N_states)
+!    N_det = N_det_max
+!    soft_touch N_det psi_det psi_coef
+!   if (s2_eig) then
+!     call make_s2_eigenfunction
+!   endif
+!    print_pt2 = .False.
+!    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!    call routine_save_right
+!  endif
+
+  allocate(ept2(1000),pt1(1000),extrap_energy(100))
+
+  correlation_energy_ratio = 0.d0
+
+! thresh_it_dav  = 5.d-5
+! soft_touch thresh_it_dav
+
+  print_pt2 = .True.
+  do while (                                                         &
+        (N_det < N_det_max) .and.                                    &
+        (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max)          &
+        )
+      write(*,'(A)')  '--------------------------------------------------------------------------------'
+
+
+    to_select = int(sqrt(dble(N_states))*dble(N_det)*selection_factor)
+    to_select = max(N_states_diag, to_select)
+
+    E_denom = E_tc ! TC Energy of the current wave function 
+    call pt2_dealloc(pt2_data)
+    call pt2_dealloc(pt2_data_err)
+    call pt2_alloc(pt2_data, N_states)
+    call pt2_alloc(pt2_data_err, N_states)
+    call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error,to_select) ! Stochastic PT2 and selection
+!    stop
+
+    N_iter += 1
+
+    if (qp_stop()) exit
+
+    ! Add selected determinants
+    call copy_H_apply_buffer_to_wf_tc()
+
+    PROVIDE  psi_l_coef_bi_ortho psi_r_coef_bi_ortho
+    PROVIDE  psi_det
+    PROVIDE  psi_det_sorted_tc
+
+    ept2(N_iter-1) = E_tc + nuclear_repulsion + (pt2_data % pt2(1))/norm
+    pt1(N_iter-1) = dsqrt(pt2_data % overlap(1,1))
+    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+    if (qp_stop()) exit
+  enddo
+!  print*,'data to extrapolate '
+!  do i = 2, N_iter
+!   print*,'iteration ',i
+!   print*,'pt1,Ept2',pt1(i),ept2(i)
+!   call get_extrapolated_energy(i-1,ept2(i),pt1(i),extrap_energy(i))
+!   do j = 2, i
+!    print*,'j,e,energy',j,extrap_energy(j)
+!   enddo
+!  enddo
+
+! thresh_it_dav  = 5.d-6
+! soft_touch thresh_it_dav
+
+  call pt2_dealloc(pt2_data)
+  call pt2_dealloc(pt2_data_err)
+  call pt2_alloc(pt2_data, N_states)
+  call pt2_alloc(pt2_data_err, N_states)
+  call ZMQ_pt2(E_tc, pt2_data, pt2_data_err, relative_error,0) ! Stochastic PT2 and selection
+  call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  if (.not.qp_stop()) then
+!    if (N_det < N_det_max) then
+!     thresh_it_dav  = 5.d-7
+!     soft_touch thresh_it_dav
+!     call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!    endif
+!
+!    call pt2_dealloc(pt2_data)
+!    call pt2_dealloc(pt2_data_err)
+!    call pt2_alloc(pt2_data, N_states)
+!    call pt2_alloc(pt2_data_err, N_states)
+!    call ZMQ_pt2(E_denom, pt2_data, pt2_data_err, relative_error, 0) ! Stochastic PT2
+!    call diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+!  endif
+!  call pt2_dealloc(pt2_data)
+!  call pt2_dealloc(pt2_data_err)
+!  call routine_save_right
+
+end
+
--- a/src/cipsi_tc_bi_ortho/zmq_selection.irp.f
+++ b/src/cipsi_tc_bi_ortho/zmq_selection.irp.f
@ -0,0 +1,235 @@
+subroutine ZMQ_selection(N_in, pt2_data)
+  use f77_zmq
+  use selection_types
+
+  implicit none
+
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket , zmq_socket_pull
+  integer, intent(in)            :: N_in
+  type(selection_buffer)         :: b
+  integer                        :: i, l, N
+  integer, external              :: omp_get_thread_num
+  type(pt2_type), intent(inout)  :: pt2_data
+
+  PROVIDE psi_det psi_coef N_det qp_max_mem N_states pt2_F s2_eig N_det_generators
+
+  N = max(N_in,1)
+  N = min(N, (elec_alpha_num * (mo_num-elec_alpha_num))**2)
+  if (.True.) then
+    PROVIDE pt2_e0_denominator nproc
+    PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+    PROVIDE psi_bilinear_matrix_rows psi_det_sorted_tc_order psi_bilinear_matrix_order
+    PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+    PROVIDE psi_bilinear_matrix_transp_order selection_weight pseudo_sym
+    PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
+    PROVIDE excitation_beta_max  excitation_alpha_max excitation_max
+
+    call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'selection')
+
+    integer, external              :: zmq_put_psi
+    integer, external              :: zmq_put_N_det_generators
+    integer, external              :: zmq_put_N_det_selectors
+    integer, external              :: zmq_put_dvector
+
+    if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
+      stop 'Unable to put psi on ZMQ server'
+    endif
+    if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then
+      stop 'Unable to put N_det_generators on ZMQ server'
+    endif
+    if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then
+      stop 'Unable to put N_det_selectors on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then
+      stop 'Unable to put energy on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then
+      stop 'Unable to put state_average_weight on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'selection_weight',selection_weight,N_states) == -1) then
+      stop 'Unable to put selection_weight on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',(/threshold_generators/),1) == -1) then
+      stop 'Unable to put threshold_generators on ZMQ server'
+    endif
+    call create_selection_buffer(N, N*2, b)
+  endif
+
+  integer, external :: add_task_to_taskserver
+  character(len=100000)           :: task
+  integer :: j,k,ipos
+  ipos=1
+  task = ' '
+
+ 
+ do i= 1, N_det_generators
+    do j=1,pt2_F(i)
+      write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, i, N
+      ipos += 30
+      if (ipos > 100000-30) then
+        if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+          stop 'Unable to add task to task server'
+        endif
+        ipos=1
+      endif
+    end do
+  enddo
+  if (ipos > 1) then
+    if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+      stop 'Unable to add task to task server'
+    endif
+  endif
+  N = max(N_in,1)
+
+
+  ASSERT (associated(b%det))
+  ASSERT (associated(b%val))
+
+  integer, external :: zmq_set_running
+  if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+    print *,  irp_here, ': Failed in zmq_set_running'
+  endif
+
+  integer :: nproc_target
+  if (N_det < 3*nproc) then
+    nproc_target = N_det/4
+  else
+    nproc_target = nproc
+  endif
+  double precision :: mem
+  mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 +  3.d0 + 5.d0) / (1024.d0**3)
+  call write_double(6,mem,'Estimated memory/thread (Gb)')
+  if (qp_max_mem > 0) then
+    nproc_target = max(1,int(dble(qp_max_mem)/(0.1d0 + mem)))
+    nproc_target = min(nproc_target,nproc)
+  endif
+
+  f(:) = 1.d0
+  if (.not.do_pt2) then
+    double precision :: f(N_states), u_dot_u
+    do k=1,min(N_det,N_states)
+     f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors)
+    enddo
+  endif
+
+  !$OMP PARALLEL DEFAULT(shared)  SHARED(b, pt2_data)  PRIVATE(i) NUM_THREADS(nproc_target+1)
+  i = omp_get_thread_num()
+  if (i==0) then
+    call selection_collector(zmq_socket_pull, b, N, pt2_data)
+  else
+    call selection_slave_inproc(i)
+  endif
+  !$OMP END PARALLEL
+
+  call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'selection')
+  if (N_in > 0) then
+    if (s2_eig) then
+      call make_selection_buffer_s2(b)
+    endif
+    call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0)
+  endif
+  call delete_selection_buffer(b)
+
+  do k=1,N_states
+    pt2_data % pt2(k) = pt2_data % pt2(k) * f(k)
+    pt2_data % variance(k) = pt2_data % variance(k) * f(k)
+    do l=1,N_states
+      pt2_data % overlap(k,l) = pt2_data % overlap(k,l) * dsqrt(f(k)*f(l))
+      pt2_data % overlap(l,k) = pt2_data % overlap(l,k) * dsqrt(f(k)*f(l))
+    enddo
+
+    pt2_data % rpt2(k) =  &
+       pt2_data % pt2(k)/(1.d0 + pt2_data % overlap(k,k))
+  enddo
+
+  pt2_overlap(:,:) = pt2_data % overlap(:,:)
+
+  print *, 'Overlap of perturbed states:'
+  do l=1,N_states
+    print *, pt2_overlap(l,:)
+  enddo
+  print *, '-------'
+  SOFT_TOUCH pt2_overlap
+  call update_pt2_and_variance_weights(pt2_data, N_states)
+
+end subroutine
+
+
+subroutine selection_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+
+  call run_selection_slave(1,i,pt2_e0_denominator)
+end
+
+subroutine selection_collector(zmq_socket_pull, b, N, pt2_data)
+  use f77_zmq
+  use selection_types
+  use bitmasks
+  implicit none
+
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in)            :: N
+  type(pt2_type), intent(inout)  :: pt2_data
+  type(pt2_type)                 :: pt2_data_tmp
+
+  double precision               :: pt2_mwen(N_states)
+  double precision               :: variance_mwen(N_states)
+  double precision               :: norm2_mwen(N_states)
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_pull_socket
+
+  integer :: msg_size, rc, more
+  integer :: acc, i, j, robin, ntask
+  double precision, pointer :: val(:)
+  integer(bit_kind), pointer :: det(:,:,:)
+  integer, allocatable :: task_id(:)
+  type(selection_buffer) :: b2
+
+
+
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+  call create_selection_buffer(N, N*2, b2)
+  integer :: k
+  double precision :: rss
+  double precision, external :: memory_of_int
+  rss = memory_of_int(N_det_generators)
+  call check_mem(rss,irp_here)
+  allocate(task_id(N_det_generators))
+  more = 1
+  pt2_data % pt2(:)      = 0d0
+  pt2_data % variance(:) = 0.d0
+  pt2_data % overlap(:,:) = 0.d0
+  call pt2_alloc(pt2_data_tmp,N_states)
+  do while (more == 1)
+    call pull_selection_results(zmq_socket_pull, pt2_data_tmp, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask)
+
+    call pt2_add(pt2_data, 1.d0, pt2_data_tmp)
+    do i=1, b2%cur
+      call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i))
+      if (b2%val(i) > b%mini) exit
+    end do
+
+    do i=1, ntask
+      if(task_id(i) == 0) then
+          print *,  "Error in collector"
+      endif
+      integer, external :: zmq_delete_task
+      if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) == -1) then
+        stop 'Unable to delete task'
+      endif
+    end do
+  end do
+  call pt2_dealloc(pt2_data_tmp)
+
+
+  call delete_selection_buffer(b2)
+  call sort_selection_buffer(b)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+end subroutine
+
--- a/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f
+++ b/src/dav_general_mat/dav_diag_dressed_ext_rout_nonsym_B1space.irp.f
@ -0,0 +1,500 @@
+
+! ---
+
+subroutine davidson_general_diag_dressed_ext_rout_nonsym_b1space(u_in, H_jj, Dress_jj,energies, sze, N_st, N_st_diag_in, converged, hcalc)
+
+  use mmap_module
+
+  BEGIN_DOC
+  ! Generic modified-Davidson diagonalization 
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! u_in : guess coefficients on the various states. Overwritten on exit by right eigenvectors
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > N_st
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  !
+  ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output)
+  END_DOC
+
+  implicit none
+
+  integer,           intent(in)   :: sze, N_st, N_st_diag_in
+  double precision,  intent(in)   :: H_jj(sze),Dress_jj(sze)
+  logical,          intent(inout) :: converged
+  double precision, intent(inout) :: u_in(sze,N_st_diag_in)
+  double precision, intent(out)   :: energies(N_st)
+  external                           hcalc
+
+  character*(16384)               :: write_buffer
+  integer                         :: iter, N_st_diag
+  integer                         :: i, j, k, l, m
+  integer                         :: iter2, itertot
+  logical                         :: disk_based
+  integer                         :: shift, shift2, itermax
+  integer                         :: nproc_target
+  integer                         :: order(N_st_diag_in)
+  double precision                :: to_print(2,N_st)
+  double precision                :: r1, r2, alpha
+  double precision                :: cpu, wall
+  double precision                :: cmax
+  double precision                :: energy_shift(N_st_diag_in*davidson_sze_max)
+  double precision, allocatable   :: U(:,:)
+  double precision, allocatable   :: y(:,:), h(:,:), lambda(:)
+  double precision, allocatable   :: residual_norm(:)
+
+  double precision                :: lambda_tmp
+  integer,          allocatable   :: i_omax(:)
+  double precision, allocatable   :: U_tmp(:), overlap(:)
+
+  double precision, allocatable :: W(:,:)
+  !double precision, pointer       :: W(:,:)
+  double precision, external      :: u_dot_v, u_dot_u
+
+
+  include 'constants.include.F'
+
+  N_st_diag = N_st_diag_in 
+!  print*,'trial vector'
+   do i = 1, sze
+    if(isnan(u_in(i,1)))then
+     print*,'pb in input vector of davidson_general_ext_rout_nonsym_b1space'
+     print*,i,u_in(i,1)
+     stop
+    else if (dabs(u_in(i,1)).lt.1.d-16)then
+     u_in(i,1) = 0.d0
+    endif
+   enddo
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda
+  if(N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_full to ', N_st_diag*3
+    stop -1
+  endif
+
+  itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1
+
+  provide threshold_nonsym_davidson 
+  call write_time(6)
+  write(6,'(A)') ''
+  write(6,'(A)') 'Davidson Diagonalization'
+  write(6,'(A)') '------------------------'
+  write(6,'(A)') ''
+
+
+  ! Find max number of cores to fit in memory
+  ! -----------------------------------------
+
+  nproc_target = nproc
+  double precision :: rss
+  integer :: maxab
+  maxab = sze 
+
+  m=1
+  disk_based = .False.
+  call resident_memory(rss)
+  do
+    r1 = 8.d0 *                                   &! bytes
+         ( dble(sze)*(N_st_diag*itermax)          &! U
+         + 1.d0*dble(sze*m)*(N_st_diag*itermax)   &! W
+         + 2.d0*(N_st_diag*itermax)**2            &! h,y
+         + 2.d0*(N_st_diag*itermax)               &! s2,lambda
+         + 1.d0*(N_st_diag)                       &! residual_norm
+                                                   ! In H_S2_u_0_nstates_zmq
+         + 3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on collector
+         + 3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on slave
+         + 0.5d0*maxab                            &! idx0 in H_S2_u_0_nstates_openmp_work_*
+         + nproc_target *                         &! In OMP section
+           ( 1.d0*(N_int*maxab)                   &! buffer
+           + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+         ) / 1024.d0**3
+
+    if(nproc_target == 0) then
+      call check_mem(r1, irp_here)
+      nproc_target = 1
+      exit
+    endif
+
+    if(r1+rss < qp_max_mem) then
+      exit
+    endif
+
+    if(itermax > 4) then
+      itermax = itermax - 1
+    else if (m==1.and.disk_based_davidson) then
+      m = 0
+      disk_based = .True.
+      itermax = 6
+    else
+      nproc_target = nproc_target - 1
+    endif
+
+  enddo
+
+  nthreads_davidson = nproc_target
+  TOUCH nthreads_davidson
+
+  call write_int(6, N_st, 'Number of states')
+  call write_int(6, N_st_diag, 'Number of states in diagonalization')
+  call write_int(6, sze, 'Number of basis functions')
+  call write_int(6, nproc_target, 'Number of threads for diagonalization')
+  call write_double(6, r1, 'Memory(Gb)')
+  if(disk_based) then
+    print *, 'Using swap space to reduce RAM'
+  endif
+
+  !---------------
+
+  write(6,'(A)') ''
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = 'Iter'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//'       Energy         Residual '
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+
+  ! ---
+
+
+  allocate( W(sze,N_st_diag*itermax) )
+
+  allocate(                                                          &
+      ! Large
+      U(sze,N_st_diag*itermax),                                      &
+      ! Small
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      lambda(N_st_diag*itermax),                                     & 
+      residual_norm(N_st_diag),                                      &
+      i_omax(N_st)                                                   &
+  )
+
+  U = 0.d0
+  h = 0.d0
+  y = 0.d0
+  lambda = 0.d0
+  residual_norm = 0.d0
+
+
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+
+  ! Davidson iterations
+  ! ===================
+
+  converged = .False.
+
+  ! Initialize from N_st to N_st_diag with gaussian random numbers
+  ! to be sure to have overlap with any eigenvectors
+  do k = N_st+1, N_st_diag
+    u_in(k,k) = 10.d0
+    do i = 1, sze
+      call random_number(r1)
+      call random_number(r2)
+      r1 = dsqrt(-2.d0*dlog(r1))
+      r2 = dtwo_pi*r2
+      u_in(i,k) = r1*dcos(r2)
+    enddo
+  enddo
+  ! Normalize all states 
+  do k = 1, N_st_diag
+    call normalize(u_in(1,k), sze)
+  enddo
+
+  ! Copy from the guess input "u_in" to the working vectors "U"
+  do k = 1, N_st_diag
+    do i = 1, sze
+      U(i,k) = u_in(i,k)
+    enddo
+  enddo
+
+  ! ---
+
+  itertot = 0
+
+  do while (.not.converged)
+
+    itertot = itertot + 1
+    if(itertot == 8) then
+      exit
+    endif
+
+    do iter = 1, itermax-1
+
+      shift  = N_st_diag * (iter-1)
+      shift2 = N_st_diag * iter
+
+      if( (iter > 1) .or. (itertot == 1) ) then
+
+        ! Gram-Schmidt to orthogonalize all new guess with the previous vectors 
+        call ortho_qr(U, size(U, 1), sze, shift2)
+        call ortho_qr(U, size(U, 1), sze, shift2)
+
+        ! W = H U
+        call hcalc(W(1,shift+1), U(1,shift+1), N_st_diag, sze)
+        call dress_calc(W(1,shift+1), Dress_jj, U(1,shift+1), N_st_diag, sze)
+
+      else
+
+        ! Already computed in update below
+        continue
+      endif
+
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+      call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 &
+                , U, size(U, 1), W, size(W, 1)        &
+                , 0.d0, h, size(h, 1) )
+
+
+      ! Diagonalize h y = lambda y
+      ! ---------------------------
+      call diag_nonsym_right(shift2, h(1,1), size(h, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1))
+
+
+      ! Express eigenvectors of h in the determinant basis:
+      ! ---------------------------------------------------
+
+      ! y(:,k) = rk
+      ! U(:,k) = Bk 
+      ! U(:,shift2+k) = Rk = Bk x rk
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , U, size(U, 1), y, size(y, 1)           & 
+                , 0.d0, U(1,shift2+1), size(U, 1) )
+
+      do k = 1, N_st_diag
+        call normalize(U(1,shift2+k), sze)
+      enddo
+
+      ! ---
+      ! select the max overlap
+
+      !
+      ! start test ------------------------------------------------------------------------
+      !
+      !double precision, allocatable :: Utest(:,:), Otest(:)
+      !allocate( Utest(sze,shift2), Otest(shift2) )
+
+      !call dgemm( 'N', 'N', sze, shift2, shift2, 1.d0 &
+      !          , U, size(U, 1), y, size(y, 1), 0.d0, Utest(1,1), size(Utest, 1) )
+      !do k = 1, shift2
+      !  call normalize(Utest(1,k), sze)
+      !enddo
+      !do j = 1, sze
+      !  write(455, '(100(1X, F16.10))') (Utest(j,k), k=1,shift2)
+      !enddo
+
+      !do k = 1, shift2 
+      !  Otest(k) = 0.d0
+      !  do i = 1, sze
+      !    Otest(k) += Utest(i,k) * u_in(i,1)
+      !  enddo
+      !  Otest(k) = dabs(Otest(k))
+      !  print *, ' Otest =', k, Otest(k), lambda(k)
+      !enddo
+     
+      !deallocate(Utest, Otest)
+      !
+      ! end test ------------------------------------------------------------------------
+      !
+
+      ! TODO 
+      ! state_following is more efficient
+      do l = 1, N_st
+
+        allocate( overlap(N_st_diag) )
+
+        do k = 1, N_st_diag
+          overlap(k) = 0.d0
+          do i = 1, sze
+            overlap(k) = overlap(k) + U(i,shift2+k) * u_in(i,l)
+          enddo
+          overlap(k) = dabs(overlap(k))
+          !print *, ' overlap =', k, overlap(k)
+        enddo
+
+        lambda_tmp = 0.d0 
+        do k = 1, N_st_diag
+          if(overlap(k) .gt. lambda_tmp) then 
+            i_omax(l)  = k
+            lambda_tmp = overlap(k)
+          endif
+        enddo
+
+        deallocate(overlap)
+
+        if(lambda_tmp .lt. 0.7d0) then
+          print *, ' very small overlap ...', l, i_omax(l)
+          print *, ' max overlap = ', lambda_tmp
+          stop
+        endif
+
+        if(i_omax(l) .ne. l) then
+          print *, ' !!! WARNONG !!!'
+          print *, ' index of state', l, i_omax(l)
+        endif
+      enddo
+
+      ! y(:,k) = rk
+      ! W(:,k) = H x Bk 
+      ! W(:,shift2+k) = H x Bk x rk
+      !               = Wk
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , W, size(W, 1), y, size(y, 1)           &
+                , 0.d0, W(1,shift2+1), size(W, 1) )
+
+      ! ---
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+
+      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      do k = 1, N_st_diag
+        do i = 1, sze
+          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2)
+        enddo
+        if(k <= N_st) then
+          l = k
+          residual_norm(k) = u_dot_u(U(1,shift2+l), sze)
+          to_print(1,k)    = lambda(l) 
+          to_print(2,k)    = residual_norm(l)
+        endif
+      enddo
+      !$OMP END PARALLEL DO
+      !residual_norm(1) = u_dot_u(U(1,shift2+1), sze)
+      !to_print(1,1) = lambda(1) 
+      !to_print(2,1) = residual_norm(1)
+
+
+      if( (itertot > 1) .and. (iter == 1) ) then
+        !don't print 
+        continue
+      else
+        write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:2,1:N_st)
+      endif
+
+      ! Check convergence
+      if(iter > 1) then
+        converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson
+      endif   
+      
+      do k = 1, N_st
+        if(residual_norm(k) > 1.e8) then
+          print *, 'Davidson failed'
+          stop -1
+        endif
+      enddo
+      if(converged) then
+        exit
+      endif
+
+      logical, external :: qp_stop
+      if(qp_stop()) then
+        converged = .True.
+        exit
+      endif
+
+    enddo ! loop over iter
+
+
+    ! Re-contract U and update W
+    ! --------------------------------
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0  &
+              , W, size(W, 1), y, size(y, 1)            &
+              , 0.d0, u_in, size(u_in, 1) )
+    do k = 1, N_st_diag
+      do i = 1, sze
+        W(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+              , U, size(U, 1), y, size(y, 1)           &
+              , 0.d0, u_in, size(u_in, 1) )
+    do k = 1, N_st_diag
+      do i = 1, sze
+        U(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call ortho_qr(U, size(U, 1), sze, N_st_diag)
+    call ortho_qr(U, size(U, 1), sze, N_st_diag)
+    do j = 1, N_st_diag
+      k = 1
+      do while( (k < sze) .and. (U(k,j) == 0.d0) )
+        k = k+1
+      enddo
+      if(U(k,j) * u_in(k,j) < 0.d0) then
+        do i = 1, sze
+          W(i,j) = -W(i,j)
+        enddo
+      endif
+    enddo
+
+  enddo ! loop over while
+
+  ! ---
+
+  do k = 1, N_st
+    energies(k) = lambda(k)
+  enddo
+  write_buffer = '====='
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') trim(write_buffer)
+  write(6,'(A)') ''
+  call write_time(6)
+
+  deallocate(W)
+  deallocate(U, h, y, lambda, residual_norm, i_omax)
+
+  FREE nthreads_davidson
+
+end subroutine davidson_general_ext_rout_nonsym_b1space
+
+! ---
+
+subroutine dress_calc(v,dress,u,N_st,sze)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computed the action of the diagonal dressing dress
+  !
+  ! WARNING :: v is not initialiazed !!!
+  END_DOC
+  integer, intent(in)              :: N_st,sze
+  double precision, intent(in)     :: u(sze,N_st),dress(sze)
+  double precision, intent(inout)  :: v(sze,N_st)
+  integer :: i,istate
+  
+  do istate = 1, N_st
+   do i = 1, sze
+    v(i,istate) += dress(i) * u(i,istate)
+   enddo
+  enddo
+end
+
+
+
+
+
+
--- a/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f
+++ b/src/dav_general_mat/dav_ext_rout_nonsym_B1space.irp.f
@ -0,0 +1,473 @@
+
+! ---
+
+subroutine davidson_general_ext_rout_nonsym_b1space(u_in, H_jj, energies, sze, N_st, N_st_diag_in, converged, hcalc)
+
+  use mmap_module
+
+  BEGIN_DOC
+  ! Generic modified-Davidson diagonalization 
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! u_in : guess coefficients on the various states. Overwritten on exit by right eigenvectors
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > N_st
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  !
+  ! hcalc subroutine to compute W = H U (see routine hcalc_template for template of input/output)
+  END_DOC
+
+  implicit none
+
+  integer,           intent(in)   :: sze, N_st, N_st_diag_in
+  double precision,  intent(in)   :: H_jj(sze)
+  logical,          intent(inout) :: converged
+  double precision, intent(inout) :: u_in(sze,N_st_diag_in)
+  double precision, intent(out)   :: energies(N_st)
+  external                           hcalc
+
+  character*(16384)               :: write_buffer
+  integer                         :: iter, N_st_diag
+  integer                         :: i, j, k, l, m
+  integer                         :: iter2, itertot
+  logical                         :: disk_based
+  integer                         :: shift, shift2, itermax
+  integer                         :: nproc_target
+  integer                         :: order(N_st_diag_in)
+  double precision                :: to_print(2,N_st)
+  double precision                :: r1, r2, alpha
+  double precision                :: cpu, wall
+  double precision                :: cmax
+  double precision                :: energy_shift(N_st_diag_in*davidson_sze_max)
+  double precision, allocatable   :: U(:,:)
+  double precision, allocatable   :: y(:,:), h(:,:), lambda(:)
+  double precision, allocatable   :: residual_norm(:)
+
+  double precision                :: lambda_tmp
+  integer,          allocatable   :: i_omax(:)
+  double precision, allocatable   :: U_tmp(:), overlap(:)
+
+  double precision, allocatable :: W(:,:)
+  !double precision, pointer       :: W(:,:)
+  double precision, external      :: u_dot_v, u_dot_u
+
+
+  include 'constants.include.F'
+
+  N_st_diag = N_st_diag_in 
+!  print*,'trial vector'
+   do i = 1, sze
+    if(isnan(u_in(i,1)))then
+     print*,'pb in input vector of davidson_general_ext_rout_nonsym_b1space'
+     print*,i,u_in(i,1)
+     stop
+    else if (dabs(u_in(i,1)).lt.1.d-16)then
+     u_in(i,1) = 0.d0
+    endif
+   enddo
+
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda
+  if(N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_full to ', N_st_diag*3
+    stop -1
+  endif
+
+  itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1
+
+  provide threshold_nonsym_davidson 
+  call write_time(6)
+  write(6,'(A)') ''
+  write(6,'(A)') 'Davidson Diagonalization'
+  write(6,'(A)') '------------------------'
+  write(6,'(A)') ''
+
+
+  ! Find max number of cores to fit in memory
+  ! -----------------------------------------
+
+  nproc_target = nproc
+  double precision :: rss
+  integer :: maxab
+  maxab = sze 
+
+  m=1
+  disk_based = .False.
+  call resident_memory(rss)
+  do
+    r1 = 8.d0 *                                   &! bytes
+         ( dble(sze)*(N_st_diag*itermax)          &! U
+         + 1.d0*dble(sze*m)*(N_st_diag*itermax)   &! W
+         + 2.d0*(N_st_diag*itermax)**2            &! h,y
+         + 2.d0*(N_st_diag*itermax)               &! s2,lambda
+         + 1.d0*(N_st_diag)                       &! residual_norm
+                                                   ! In H_S2_u_0_nstates_zmq
+         + 3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on collector
+         + 3.d0*(N_st_diag*N_det)                 &! u_t, v_t, s_t on slave
+         + 0.5d0*maxab                            &! idx0 in H_S2_u_0_nstates_openmp_work_*
+         + nproc_target *                         &! In OMP section
+           ( 1.d0*(N_int*maxab)                   &! buffer
+           + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+         ) / 1024.d0**3
+
+    if(nproc_target == 0) then
+      call check_mem(r1, irp_here)
+      nproc_target = 1
+      exit
+    endif
+
+    if(r1+rss < qp_max_mem) then
+      exit
+    endif
+
+    if(itermax > 4) then
+      itermax = itermax - 1
+!    else if (m==1.and.disk_based_davidson) then
+!      m = 0
+!      disk_based = .True.
+!      itermax = 6
+    else
+      nproc_target = nproc_target - 1
+    endif
+
+  enddo
+
+  nthreads_davidson = nproc_target
+  TOUCH nthreads_davidson
+
+  call write_int(6, N_st, 'Number of states')
+  call write_int(6, N_st_diag, 'Number of states in diagonalization')
+  call write_int(6, sze, 'Number of basis functions')
+  call write_int(6, nproc_target, 'Number of threads for diagonalization')
+  call write_double(6, r1, 'Memory(Gb)')
+  if(disk_based) then
+    print *, 'Using swap space to reduce RAM'
+  endif
+
+  !---------------
+
+  write(6,'(A)') ''
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = 'Iter'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//'       Energy         Residual '
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = '====='
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+
+  ! ---
+
+
+  allocate( W(sze,N_st_diag*itermax) )
+
+  allocate(                                                          &
+      ! Large
+      U(sze,N_st_diag*itermax),                                      &
+      ! Small
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      lambda(N_st_diag*itermax),                                     & 
+      residual_norm(N_st_diag),                                      &
+      i_omax(N_st)                                                   &
+  )
+
+  U = 0.d0
+  h = 0.d0
+  y = 0.d0
+  lambda = 0.d0
+  residual_norm = 0.d0
+
+
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+
+  ! Davidson iterations
+  ! ===================
+
+  converged = .False.
+
+  ! Initialize from N_st to N_st_diag with gaussian random numbers
+  ! to be sure to have overlap with any eigenvectors
+  do k = N_st+1, N_st_diag
+    u_in(k,k) = 10.d0
+    do i = 1, sze
+      call random_number(r1)
+      call random_number(r2)
+      r1 = dsqrt(-2.d0*dlog(r1))
+      r2 = dtwo_pi*r2
+      u_in(i,k) = r1*dcos(r2)
+    enddo
+  enddo
+  ! Normalize all states 
+  do k = 1, N_st_diag
+    call normalize(u_in(1,k), sze)
+  enddo
+
+  ! Copy from the guess input "u_in" to the working vectors "U"
+  do k = 1, N_st_diag
+    do i = 1, sze
+      U(i,k) = u_in(i,k)
+    enddo
+  enddo
+
+  ! ---
+
+  itertot = 0
+
+  do while (.not.converged)
+
+    itertot = itertot + 1
+    if(itertot == 8) then
+      exit
+    endif
+
+    do iter = 1, itermax-1
+
+      shift  = N_st_diag * (iter-1)
+      shift2 = N_st_diag * iter
+
+      if( (iter > 1) .or. (itertot == 1) ) then
+
+        ! Gram-Schmidt to orthogonalize all new guess with the previous vectors 
+        call ortho_qr(U, size(U, 1), sze, shift2)
+        call ortho_qr(U, size(U, 1), sze, shift2)
+
+        ! W = H U
+        call hcalc(W(1,shift+1), U(1,shift+1), N_st_diag, sze)
+
+      else
+
+        ! Already computed in update below
+        continue
+      endif
+
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+      call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 &
+                , U, size(U, 1), W, size(W, 1)        &
+                , 0.d0, h, size(h, 1) )
+
+
+      ! Diagonalize h y = lambda y
+      ! ---------------------------
+      call diag_nonsym_right(shift2, h(1,1), size(h, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1))
+
+
+      ! Express eigenvectors of h in the determinant basis:
+      ! ---------------------------------------------------
+
+      ! y(:,k) = rk
+      ! U(:,k) = Bk 
+      ! U(:,shift2+k) = Rk = Bk x rk
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , U, size(U, 1), y, size(y, 1)           & 
+                , 0.d0, U(1,shift2+1), size(U, 1) )
+
+      do k = 1, N_st_diag
+        call normalize(U(1,shift2+k), sze)
+      enddo
+
+      ! ---
+      ! select the max overlap
+
+      !
+      ! start test ------------------------------------------------------------------------
+      !
+      !double precision, allocatable :: Utest(:,:), Otest(:)
+      !allocate( Utest(sze,shift2), Otest(shift2) )
+
+      !call dgemm( 'N', 'N', sze, shift2, shift2, 1.d0 &
+      !          , U, size(U, 1), y, size(y, 1), 0.d0, Utest(1,1), size(Utest, 1) )
+      !do k = 1, shift2
+      !  call normalize(Utest(1,k), sze)
+      !enddo
+      !do j = 1, sze
+      !  write(455, '(100(1X, F16.10))') (Utest(j,k), k=1,shift2)
+      !enddo
+
+      !do k = 1, shift2 
+      !  Otest(k) = 0.d0
+      !  do i = 1, sze
+      !    Otest(k) += Utest(i,k) * u_in(i,1)
+      !  enddo
+      !  Otest(k) = dabs(Otest(k))
+      !  print *, ' Otest =', k, Otest(k), lambda(k)
+      !enddo
+     
+      !deallocate(Utest, Otest)
+      !
+      ! end test ------------------------------------------------------------------------
+      !
+
+      ! TODO 
+      ! state_following is more efficient
+      do l = 1, N_st
+
+        allocate( overlap(N_st_diag) )
+
+        do k = 1, N_st_diag
+          overlap(k) = 0.d0
+          do i = 1, sze
+            overlap(k) = overlap(k) + U(i,shift2+k) * u_in(i,l)
+          enddo
+          overlap(k) = dabs(overlap(k))
+          !print *, ' overlap =', k, overlap(k)
+        enddo
+
+        lambda_tmp = 0.d0 
+        do k = 1, N_st_diag
+          if(overlap(k) .gt. lambda_tmp) then 
+            i_omax(l)  = k
+            lambda_tmp = overlap(k)
+          endif
+        enddo
+
+        deallocate(overlap)
+
+        if(lambda_tmp .lt. 0.7d0) then
+          print *, ' very small overlap ...', l, i_omax(l)
+          print *, ' max overlap = ', lambda_tmp
+          stop
+        endif
+
+        if(i_omax(l) .ne. l) then
+          print *, ' !!! WARNONG !!!'
+          print *, ' index of state', l, i_omax(l)
+        endif
+      enddo
+
+      ! y(:,k) = rk
+      ! W(:,k) = H x Bk 
+      ! W(:,shift2+k) = H x Bk x rk
+      !               = Wk
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , W, size(W, 1), y, size(y, 1)           &
+                , 0.d0, W(1,shift2+1), size(W, 1) )
+
+      ! ---
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+
+      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      do k = 1, N_st_diag
+        do i = 1, sze
+          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2)
+        enddo
+        if(k <= N_st) then
+          l = k
+          residual_norm(k) = u_dot_u(U(1,shift2+l), sze)
+          to_print(1,k)    = lambda(l) 
+          to_print(2,k)    = residual_norm(l)
+        endif
+      enddo
+      !$OMP END PARALLEL DO
+      !residual_norm(1) = u_dot_u(U(1,shift2+1), sze)
+      !to_print(1,1) = lambda(1) 
+      !to_print(2,1) = residual_norm(1)
+
+
+      if( (itertot > 1) .and. (iter == 1) ) then
+        !don't print 
+        continue
+      else
+        write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, F16.10, 1X, F16.10))') iter-1, to_print(1:2,1:N_st)
+      endif
+
+      ! Check convergence
+      if(iter > 1) then
+        converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson
+      endif   
+      
+      do k = 1, N_st
+        if(residual_norm(k) > 1.e8) then
+          print *, 'Davidson failed'
+          stop -1
+        endif
+      enddo
+      if(converged) then
+        exit
+      endif
+
+      logical, external :: qp_stop
+      if(qp_stop()) then
+        converged = .True.
+        exit
+      endif
+
+    enddo ! loop over iter
+
+
+    ! Re-contract U and update W
+    ! --------------------------------
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0  &
+              , W, size(W, 1), y, size(y, 1)            &
+              , 0.d0, u_in, size(u_in, 1) )
+    do k = 1, N_st_diag
+      do i = 1, sze
+        W(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+              , U, size(U, 1), y, size(y, 1)           &
+              , 0.d0, u_in, size(u_in, 1) )
+    do k = 1, N_st_diag
+      do i = 1, sze
+        U(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call ortho_qr(U, size(U, 1), sze, N_st_diag)
+    call ortho_qr(U, size(U, 1), sze, N_st_diag)
+    do j = 1, N_st_diag
+      k = 1
+      do while( (k < sze) .and. (U(k,j) == 0.d0) )
+        k = k+1
+      enddo
+      if(U(k,j) * u_in(k,j) < 0.d0) then
+        do i = 1, sze
+          W(i,j) = -W(i,j)
+        enddo
+      endif
+    enddo
+
+  enddo ! loop over while
+
+  ! ---
+
+  do k = 1, N_st
+    energies(k) = lambda(k)
+  enddo
+  write_buffer = '====='
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//' ================  ==========='
+  enddo
+  write(6,'(A)') trim(write_buffer)
+  write(6,'(A)') ''
+  call write_time(6)
+
+  deallocate(W)
+  deallocate(U, h, y, lambda, residual_norm, i_omax)
+
+  FREE nthreads_davidson
+
+end subroutine davidson_general_ext_rout_nonsym_b1space
+
+! ---
--- a/src/davidson/EZFIO.cfg
+++ b/src/davidson/EZFIO.cfg
@ -4,6 +4,12 @@ doc: Thresholds of Davidson's algorithm if threshold_davidson_from_pt2 is false.
 interface: ezfio,provider,ocaml
 default: 1.e-10

+[threshold_nonsym_davidson]
+type: Threshold
+doc: Thresholds of non-symetric Davidson's algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
 [threshold_davidson_from_pt2]
 type: logical
 doc: Thresholds of Davidson's algorithm is set to E(rPT2)*threshold_davidson_from_pt2
--- a/src/determinants/determinants.irp.f
+++ b/src/determinants/determinants.irp.f
@ -589,6 +589,67 @@ subroutine save_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psicoef)
  endif
 end

+subroutine save_wavefunction_general_unormalized(ndet,nstates,psidet,dim_psicoef,psicoef)
+  implicit none
+  BEGIN_DOC
+  !  Save the wave function into the |EZFIO| file
+  END_DOC
+  use bitmasks
+  include 'constants.include.F'
+  integer, intent(in)            :: ndet,nstates,dim_psicoef
+  integer(bit_kind), intent(in)  :: psidet(N_int,2,ndet)
+  double precision, intent(in)   :: psicoef(dim_psicoef,nstates)
+  integer*8, allocatable         :: psi_det_save(:,:,:)
+  double precision, allocatable  :: psi_coef_save(:,:)
+
+  double precision               :: accu_norm
+  integer                        :: i,j,k, ndet_qp_edit
+
+  if (mpi_master) then
+    ndet_qp_edit = min(ndet,N_det_qp_edit)
+
+    call ezfio_set_determinants_N_int(N_int)
+    call ezfio_set_determinants_bit_kind(bit_kind)
+    call ezfio_set_determinants_N_det(ndet)
+    call ezfio_set_determinants_N_det_qp_edit(ndet_qp_edit)
+    call ezfio_set_determinants_n_states(nstates)
+    call ezfio_set_determinants_mo_label(mo_label)
+
+    allocate (psi_det_save(N_int,2,ndet))
+    do i=1,ndet
+      do j=1,2
+        do k=1,N_int
+          psi_det_save(k,j,i) = transfer(psidet(k,j,i),1_8)
+        enddo
+      enddo
+    enddo
+    call ezfio_set_determinants_psi_det(psi_det_save)
+    call ezfio_set_determinants_psi_det_qp_edit(psi_det_save)
+    deallocate (psi_det_save)
+
+    allocate (psi_coef_save(ndet,nstates))
+    do k=1,nstates
+      do i=1,ndet
+        psi_coef_save(i,k) = psicoef(i,k)
+      enddo
+    enddo
+
+    call ezfio_set_determinants_psi_coef(psi_coef_save)
+    deallocate (psi_coef_save)
+
+    allocate (psi_coef_save(ndet_qp_edit,nstates))
+    do k=1,nstates
+      do i=1,ndet_qp_edit
+        psi_coef_save(i,k) = psicoef(i,k)
+      enddo
+    enddo
+
+    call ezfio_set_determinants_psi_coef_qp_edit(psi_coef_save)
+    deallocate (psi_coef_save)
+
+    call write_int(6,ndet,'Saved determinants')
+  endif
+end


 subroutine save_wavefunction_specified(ndet,nstates,psidet,psicoef,ndetsave,index_det_save)
--- a/src/fci_tc_bi/13.fci_tc_bi_ortho.bats
+++ b/src/fci_tc_bi/13.fci_tc_bi_ortho.bats
@ -0,0 +1,26 @@
+#!/usr/bin/env bats
+
+source $QP_ROOT/tests/bats/common.bats.sh
+source $QP_ROOT/quantum_package.rc
+
+
+function run_O() {
+  qp set_file O_tc_scf 
+  FILE=O_tc_scf/tc_bi_ortho/psi_l_coef_bi_ortho.gz
+  if test -f "$FILE"; then
+    rm O_tc_scf/tc_bi_ortho/psi*
+  fi
+  qp set determinants n_det_max 20000
+  file=${EZFIO_FILE}.fci_tc_bi_ortho.out
+  qp run fci_tc_bi_ortho | tee $file
+  eref=-74.971188861115309
+  energy="$(grep 'E(before) +rPT2   =' $file | tail -1 | cut -d '=' -f 2)"
+  eq $energy $eref 1e-4
+}
+
+
+@test "O" {
+ run_O 
+}
+
+
--- a/src/fci_tc_bi/EZFIO.cfg
+++ b/src/fci_tc_bi/EZFIO.cfg
@ -0,0 +1,17 @@
+[energy]
+type: double precision
+doc: Calculated Selected |FCI| energy
+interface: ezfio
+size: (determinants.n_states)
+
+[energy_pt2]
+type: double precision
+doc: Calculated |FCI| energy + |PT2|
+interface: ezfio
+size: (determinants.n_states)
+
+[cipsi_tc]
+type: character*(32)
+doc: TODO
+interface: ezfio,provider,ocaml
+default: h_tc
--- a/src/fci_tc_bi/NEED
+++ b/src/fci_tc_bi/NEED
@ -0,0 +1,3 @@
+tc_bi_ortho
+davidson_undressed
+cipsi_tc_bi_ortho
--- a/src/fci_tc_bi/class.irp.f
+++ b/src/fci_tc_bi/class.irp.f
@ -0,0 +1,12 @@
+ BEGIN_PROVIDER [ logical, do_only_1h1p ]
+&BEGIN_PROVIDER [ logical, do_only_cas  ]
+&BEGIN_PROVIDER [ logical, do_ddci ]
+ implicit none
+ BEGIN_DOC
+ ! In the FCI case, all those are always false
+ END_DOC
+ do_only_1h1p = .False.
+ do_only_cas  = .False.
+ do_ddci = .False.
+END_PROVIDER
+
--- a/src/fci_tc_bi/copy_wf.irp.f
+++ b/src/fci_tc_bi/copy_wf.irp.f
@ -0,0 +1,215 @@
+
+use bitmasks
+
+subroutine copy_H_apply_buffer_to_wf_tc
+  use omp_lib
+  implicit none
+  BEGIN_DOC
+! Copies the H_apply buffer to psi_coef.
+! After calling this subroutine, N_det, psi_det and psi_coef need to be touched
+  END_DOC
+  integer(bit_kind), allocatable :: buffer_det(:,:,:)
+  double precision, allocatable  :: buffer_r_coef(:,:), buffer_l_coef(:,:)
+  integer                        :: i,j,k
+  integer                        :: N_det_old
+
+  PROVIDE H_apply_buffer_allocated
+
+
+  ASSERT (N_int > 0)
+  ASSERT (N_det > 0)
+
+  allocate ( buffer_det(N_int,2,N_det), buffer_r_coef(N_det,N_states), buffer_l_coef(N_det,N_states)  )
+
+  ! Backup determinants
+  j=0
+  do i=1,N_det
+!    if (pruned(i)) cycle  ! Pruned determinants
+    j+=1
+    ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num)
+    ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num)
+    buffer_det(:,:,j) = psi_det(:,:,i)
+  enddo
+  N_det_old = j
+
+  ! Backup coefficients
+  do k=1,N_states
+    j=0
+    do i=1,N_det
+!      if (pruned(i)) cycle  ! Pruned determinants
+      j += 1
+      buffer_r_coef(j,k) = psi_r_coef_bi_ortho(i,k)
+      buffer_l_coef(j,k) = psi_l_coef_bi_ortho(i,k)
+    enddo
+    ASSERT ( j == N_det_old )
+  enddo
+
+  ! Update N_det
+  N_det = N_det_old
+  do j=0,nproc-1
+    N_det = N_det + H_apply_buffer(j)%N_det
+  enddo
+
+  ! Update array sizes
+  if (psi_det_size < N_det) then
+    psi_det_size = N_det
+    TOUCH psi_det_size
+  endif
+
+  ! Restore backup in resized array
+  do i=1,N_det_old
+    psi_det(:,:,i) = buffer_det(:,:,i)
+    ASSERT (sum(popcnt(psi_det(:,1,i))) == elec_alpha_num)
+    ASSERT (sum(popcnt(psi_det(:,2,i))) == elec_beta_num )
+  enddo
+  do k=1,N_states
+    do i=1,N_det_old
+      psi_r_coef_bi_ortho(i,k) = buffer_r_coef(i,k)
+      psi_l_coef_bi_ortho(i,k) = buffer_l_coef(i,k)
+    enddo
+  enddo
+
+  ! Copy new buffers
+
+  !$OMP PARALLEL DEFAULT(SHARED)                                     &
+      !$OMP PRIVATE(j,k,i) FIRSTPRIVATE(N_det_old)                   &
+      !$OMP SHARED(N_int,H_apply_buffer,psi_det,psi_r_coef_bi_ortho,psi_l_coef_bi_ortho,N_states,psi_det_size)
+  j=0
+  !$ j=omp_get_thread_num()
+  do k=0,j-1
+    N_det_old += H_apply_buffer(k)%N_det
+  enddo
+  do i=1,H_apply_buffer(j)%N_det
+    do k=1,N_int
+      psi_det(k,1,i+N_det_old) = H_apply_buffer(j)%det(k,1,i)
+      psi_det(k,2,i+N_det_old) = H_apply_buffer(j)%det(k,2,i)
+    enddo
+    ASSERT (sum(popcnt(psi_det(:,1,i+N_det_old))) == elec_alpha_num)
+    ASSERT (sum(popcnt(psi_det(:,2,i+N_det_old))) == elec_beta_num )
+  enddo
+  do k=1,N_states
+    do i=1,H_apply_buffer(j)%N_det
+      psi_r_coef_bi_ortho(i+N_det_old,k) = H_apply_buffer(j)%coef(i,k)
+      psi_l_coef_bi_ortho(i+N_det_old,k) = 0.d0
+    enddo
+  enddo
+  !$OMP BARRIER
+  H_apply_buffer(j)%N_det = 0
+  !$OMP END PARALLEL
+  SOFT_TOUCH N_det psi_det psi_r_coef_bi_ortho psi_l_coef_bi_ortho 
+
+  logical :: found_duplicates
+  call remove_duplicates_in_psi_det_tc(found_duplicates)
+  call bi_normalize(psi_l_coef_bi_ortho,psi_r_coef_bi_ortho,N_det,size(psi_l_coef_bi_ortho,1),N_states)
+  SOFT_TOUCH N_det psi_det psi_r_coef_bi_ortho psi_l_coef_bi_ortho
+
+end
+
+subroutine remove_duplicates_in_psi_det_tc(found_duplicates)
+  implicit none
+  logical, intent(out) :: found_duplicates
+  BEGIN_DOC
+! Removes duplicate determinants in the wave function.
+  END_DOC
+  integer                        :: i,j,k
+  integer(bit_kind), allocatable :: bit_tmp(:)
+  logical,allocatable            :: duplicate(:)
+  logical                        :: dup
+
+  allocate (duplicate(N_det), bit_tmp(N_det))
+
+  found_duplicates = .False.
+
+  !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,dup)
+
+  !$OMP DO
+  do i=1,N_det
+    integer, external            :: det_search_key
+    !$DIR FORCEINLINE
+    bit_tmp(i) = det_search_key(psi_det_sorted_bit_tc(1,1,i),N_int)
+    duplicate(i) = .False.
+  enddo
+  !$OMP END DO
+
+  !$OMP DO schedule(dynamic,1024)
+  do i=1,N_det-1
+    if (duplicate(i)) then
+      cycle
+    endif
+    j = i+1
+    do while (bit_tmp(j)==bit_tmp(i))
+      if (duplicate(j)) then
+        j = j+1
+        if (j > N_det) then
+          exit
+        else
+          cycle
+        endif
+      endif
+      dup = .True.
+      do k=1,N_int
+        if ( (psi_det_sorted_bit_tc(k,1,i) /= psi_det_sorted_bit_tc(k,1,j) ) &
+        .or. (psi_det_sorted_bit_tc(k,2,i) /= psi_det_sorted_bit_tc(k,2,j) ) ) then
+          dup = .False.
+          exit
+        endif
+      enddo
+      if (dup) then
+        duplicate(j) = .True.
+        found_duplicates = .True.
+      endif
+      j += 1
+      if (j > N_det) then
+        exit
+      endif
+    enddo
+  enddo
+  !$OMP END DO
+  !$OMP END PARALLEL
+
+  if (found_duplicates) then
+    k=0
+    do i=1,N_det
+      if (.not.duplicate(i)) then
+        k += 1
+        psi_det(:,:,k) = psi_det_sorted_bit_tc (:,:,i)
+        psi_r_coef_bi_ortho(k,:)  = psi_r_coef_sorted_bit(i,:)
+        psi_l_coef_bi_ortho(k,:)  = psi_l_coef_sorted_bit(i,:)
+      else
+        if (sum(abs(psi_r_coef_sorted_bit(i,:))) /= 0.d0 ) then
+          psi_r_coef_bi_ortho(k,:)  = psi_r_coef_sorted_bit(i,:)
+          psi_l_coef_bi_ortho(k,:)  = psi_l_coef_sorted_bit(i,:)
+        endif
+      endif
+    enddo
+    N_det = k
+    psi_det_sorted_bit_tc(:,:,1:N_det) = psi_det(:,:,1:N_det)
+    psi_r_coef_sorted_bit(1:N_det,:) = psi_r_coef_bi_ortho(1:N_det,:)
+    psi_l_coef_sorted_bit(1:N_det,:) = psi_l_coef_bi_ortho(1:N_det,:)
+    TOUCH N_det psi_det psi_det_sorted_bit_tc c0_weight psi_r_coef_sorted_bit psi_l_coef_sorted_bit
+  endif
+  psi_det = psi_det_sorted_tc
+  psi_r_coef_bi_ortho = psi_r_coef_sorted_bi_ortho
+  psi_l_coef_bi_ortho = psi_l_coef_sorted_bi_ortho
+  SOFT_TOUCH psi_det psi_r_coef_bi_ortho psi_l_coef_bi_ortho psi_det_sorted_bit_tc psi_r_coef_sorted_bit psi_l_coef_sorted_bit
+  deallocate (duplicate,bit_tmp)
+end
+
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_bit_tc, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_r_coef_sorted_bit, (N_det,N_states) ]
+&BEGIN_PROVIDER [ double precision, psi_l_coef_sorted_bit, (N_det,N_states) ]
+   implicit none
+   BEGIN_DOC
+   ! Determinants on which we apply $\langle i|H|psi \rangle$ for perturbation.
+   ! They are sorted by determinants interpreted as integers. Useful
+   ! to accelerate the search of a random determinant in the wave
+   ! function.
+   END_DOC
+
+   call sort_dets_by_det_search_key(N_det, psi_det, psi_r_coef_bi_ortho, size(psi_r_coef_bi_ortho,1),       &
+       psi_det_sorted_bit_tc, psi_r_coef_sorted_bit, N_states)
+   call sort_dets_by_det_search_key(N_det, psi_det, psi_l_coef_bi_ortho, size(psi_l_coef_bi_ortho,1),       &
+       psi_det_sorted_bit_tc, psi_l_coef_sorted_bit, N_states)
+
+END_PROVIDER
--- a/src/fci_tc_bi/diagonalize_ci.irp.f
+++ b/src/fci_tc_bi/diagonalize_ci.irp.f
@ -0,0 +1,100 @@
+
+subroutine diagonalize_CI_tc_bi_ortho(ndet, E_tc,norm,pt2_data,print_pt2)
+  use selection_types
+  implicit none
+  integer, intent(inout)              :: ndet      ! number of determinants from before 
+  double precision, intent(inout)     :: E_tc,norm ! E and norm from previous wave function 
+  type(pt2_type)  , intent(in)        :: pt2_data  ! PT2 from previous wave function 
+  logical, intent(in) :: print_pt2
+  BEGIN_DOC
+!  Replace the coefficients of the CI states by the coefficients of the
+!  eigenstates of the CI matrix
+  END_DOC
+  integer :: i,j
+ double precision :: pt2_tmp,pt1_norm,rpt2_tmp,abs_pt2
+ pt2_tmp = pt2_data % pt2(1)
+ abs_pt2 = pt2_data % variance(1)
+ pt1_norm = pt2_data % overlap(1,1)
+ rpt2_tmp = pt2_tmp/(1.d0 + pt1_norm)
+  print*,'*****'
+  print*,'New wave function information'
+  print*,'N_det tc               = ',N_det
+  print*,'norm_ground_left_right_bi_orth = ',norm_ground_left_right_bi_orth
+  print*,'eigval_right_tc = ',eigval_right_tc_bi_orth(1)
+  print*,'Ndet, E_tc = ',N_det,eigval_right_tc_bi_orth(1)
+  print*,'*****'
+  if(print_pt2)then
+   print*,'*****'
+   print*,'previous wave function info'
+   print*,'norm(before)      = ',norm
+   print*,'E(before)         = ',E_tc 
+   print*,'PT1 norm          = ',dsqrt(pt1_norm)
+   print*,'PT2               = ',pt2_tmp
+   print*,'rPT2              = ',rpt2_tmp
+   print*,'|PT2|             = ',abs_pt2
+   print*,'Positive PT2      = ',(pt2_tmp + abs_pt2)*0.5d0
+   print*,'Negative PT2      = ',(pt2_tmp - abs_pt2)*0.5d0
+   print*,'E(before) + PT2   = ',E_tc + pt2_tmp/norm
+   print*,'E(before) +rPT2   = ',E_tc + rpt2_tmp/norm
+   write(*,'(A28,X,I10,X,100(F16.8,X))')'Ndet,E,E+PT2,E+RPT2,|PT2|=',ndet,E_tc ,E_tc  + pt2_tmp/norm,E_tc  + rpt2_tmp/norm,abs_pt2
+   print*,'*****'
+  endif
+  E_tc  = eigval_right_tc_bi_orth(1)
+  norm  = norm_ground_left_right_bi_orth
+  ndet  = N_det
+  do j=1,N_states
+    do i=1,N_det
+      psi_l_coef_bi_ortho(i,j) = leigvec_tc_bi_orth(i,j)
+      psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j)
+      psi_coef(i,j) = dabs(psi_l_coef_bi_ortho(i,j) * psi_r_coef_bi_ortho(i,j))   
+    enddo
+  enddo
+  SOFT_TOUCH  eigval_left_tc_bi_orth  eigval_right_tc_bi_orth  leigvec_tc_bi_orth  reigvec_tc_bi_orth norm_ground_left_right_bi_orth psi_coef  psi_l_coef_bi_ortho psi_r_coef_bi_ortho
+
+
+
+  call save_tc_bi_ortho_wavefunction
+end
+
+subroutine print_CI_dressed(ndet, E_tc,norm,pt2_data,print_pt2)
+  use selection_types
+  implicit none
+  integer, intent(inout)              :: ndet      ! number of determinants from before 
+  double precision, intent(inout)     :: E_tc,norm ! E and norm from previous wave function 
+  type(pt2_type)  , intent(in)        :: pt2_data  ! PT2 from previous wave function 
+  logical, intent(in) :: print_pt2
+  BEGIN_DOC
+!  Replace the coefficients of the CI states by the coefficients of the
+!  eigenstates of the CI matrix
+  END_DOC
+  integer :: i,j
+  print*,'*****'
+  print*,'New wave function information'
+  print*,'N_det tc               = ',N_det
+  print*,'norm_ground_left_right_bi_orth = ',norm_ground_left_right_bi_orth
+  print*,'eigval_right_tc = ',eigval_right_tc_bi_orth(1)
+  print*,'Ndet, E_tc = ',N_det,eigval_right_tc_bi_orth(1)
+  print*,'*****'
+  if(print_pt2)then
+   print*,'*****'
+   print*,'previous wave function info'
+   print*,'norm(before)      = ',norm
+   print*,'E(before)         = ',E_tc
+   print*,'PT1 norm          = ',dsqrt(pt2_data % overlap(1,1))
+   print*,'E(before) + PT2   = ',E_tc + (pt2_data % pt2(1))/norm
+   print*,'PT2               = ',pt2_data % pt2(1)
+   print*,'Ndet, E_tc, E+PT2 = ',ndet,E_tc,E_tc + (pt2_data % pt2(1))/norm,dsqrt(pt2_data % overlap(1,1))
+   print*,'*****'
+  endif
+  E_tc  = eigval_right_tc_bi_orth(1)
+  norm  = norm_ground_left_right_bi_orth
+  ndet  = N_det
+  do j=1,N_states
+    do i=1,N_det
+      psi_coef(i,j) = reigvec_tc_bi_orth(i,j)
+    enddo
+  enddo
+  SOFT_TOUCH  eigval_left_tc_bi_orth  eigval_right_tc_bi_orth  leigvec_tc_bi_orth  norm_ground_left_right_bi_orth  psi_coef  reigvec_tc_bi_orth 
+
+end
+
--- a/src/fci_tc_bi/fci_tc_bi_ortho.irp.f
+++ b/src/fci_tc_bi/fci_tc_bi_ortho.irp.f
@ -0,0 +1,85 @@
+program fci
+  implicit none
+  BEGIN_DOC
+  ! Selected Full Configuration Interaction with stochastic selection
+  ! and PT2.
+  !
+  ! This program performs a |CIPSI|-like selected |CI| using a
+  ! stochastic scheme for both the selection of the important Slater
+  ! determinants and the computation of the |PT2| correction. This
+  ! |CIPSI|-like algorithm will be performed for the lowest states of
+  ! the variational space (see :option:`determinants n_states`). The
+  ! |FCI| program will stop when reaching at least one the two following
+  ! conditions:
+  !
+  ! * number of Slater determinants > :option:`determinants n_det_max`
+  ! * abs(|PT2|) less than :option:`perturbation pt2_max`
+  !
+  ! The following other options can be of interest:
+  !
+  ! :option:`determinants read_wf`
+  !   When set to |false|, the program starts with a ROHF-like Slater
+  !   determinant as a guess wave function. When set to |true|, the
+  !   program starts with the wave function(s) stored in the |EZFIO|
+  !   directory as guess wave function(s).
+  !
+  ! :option:`determinants s2_eig`
+  !   When set to |true|, the selection will systematically add all the
+  !   necessary Slater determinants in order to have a pure spin wave
+  !   function with an |S^2| value corresponding to
+  !   :option:`determinants expected_s2`.
+  !
+  ! For excited states calculations, it is recommended to start with
+  ! :ref:`cis` or :ref:`cisd` guess wave functions, eventually in
+  ! a restricted set of |MOs|, and to set :option:`determinants s2_eig`
+  ! to |true|.
+  !
+  END_DOC
+
+
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid 
+  pruning = -1.d0
+  touch pruning
+!  pt2_relative_error = 0.01d0
+!  touch pt2_relative_error
+  call run_cipsi_tc
+
+end
+
+
+subroutine run_cipsi_tc
+
+  implicit none
+
+  if (.not.is_zmq_slave) then
+    PROVIDE psi_det psi_coef mo_bi_ortho_tc_two_e mo_bi_ortho_tc_one_e
+    if(elec_alpha_num+elec_beta_num.ge.3)then
+      if(three_body_h_tc)then
+        call provide_all_three_ints_bi_ortho
+      endif
+    endif
+    ! ---
+
+    if (do_pt2) then
+      call run_stochastic_cipsi
+    else
+      call run_cipsi
+    endif
+
+  else
+    PROVIDE mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e pt2_min_parallel_tasks
+    if(elec_alpha_num+elec_beta_num.ge.3)then
+      if(three_body_h_tc)then
+        call provide_all_three_ints_bi_ortho
+      endif
+    endif
+    ! ---
+
+    call run_slave_cipsi
+
+  endif
+
+end
--- a/src/fci_tc_bi/generators.irp.f
+++ b/src/fci_tc_bi/generators.irp.f
@ -0,0 +1,51 @@
+use bitmasks
+
+BEGIN_PROVIDER [ integer, N_det_generators ]
+ implicit none
+ BEGIN_DOC
+ ! For Single reference wave functions, the number of generators is 1 : the
+ ! Hartree-Fock determinant
+ END_DOC
+ integer :: i
+ double precision :: norm
+ call write_time(6)
+ norm = 1.d0
+ N_det_generators = N_det
+ do i=1,N_det
+   norm = norm - psi_average_norm_contrib_sorted_tc(i)
+   if (norm - 1.d-10 < 1.d0 - threshold_generators) then
+     N_det_generators = i
+     exit
+   endif
+ enddo
+ N_det_generators = max(N_det_generators,1)
+ call write_int(6,N_det_generators,'Number of generators')
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_generators, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_coef_generators, (psi_det_size,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! For Single reference wave functions, the generator is the
+ ! Hartree-Fock determinant
+ END_DOC
+ psi_det_generators(1:N_int,1:2,1:N_det) = psi_det_sorted_tc(1:N_int,1:2,1:N_det)
+ psi_coef_generators(1:N_det,1:N_states) = psi_coef_sorted_tc(1:N_det,1:N_states)
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc_gen, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc_gen, (psi_det_size,N_states) ]
+&BEGIN_PROVIDER [ integer, psi_det_sorted_tc_gen_order,     (psi_det_size)  ]
+
+ implicit none
+ BEGIN_DOC
+ ! For Single reference wave functions, the generator is the
+ ! Hartree-Fock determinant
+ END_DOC
+ psi_det_sorted_tc_gen = psi_det_sorted_tc
+ psi_coef_sorted_tc_gen = psi_coef_sorted_tc
+ psi_det_sorted_tc_gen_order = psi_det_sorted_tc_order
+END_PROVIDER
+
+
--- a/src/fci_tc_bi/save_energy.irp.f
+++ b/src/fci_tc_bi/save_energy.irp.f
@ -0,0 +1,9 @@
+subroutine save_energy(E,pt2)
+  implicit none
+  BEGIN_DOC
+! Saves the energy in |EZFIO|.
+  END_DOC
+  double precision, intent(in) :: E(N_states), pt2(N_states)
+  call ezfio_set_fci_tc_energy(E(1:N_states))
+  call ezfio_set_fci_tc_energy_pt2(E(1:N_states)+pt2(1:N_states))
+end
--- a/src/fci_tc_bi/scripts_fci_tc/CH2.xyz
+++ b/src/fci_tc_bi/scripts_fci_tc/CH2.xyz
@ -0,0 +1,6 @@
+3
+
+C                    6.000000     0.000000     0.000000     0.173480    
+H                    1.000000     0.000000    -0.861500    -0.520430
+H                    1.000000     0.000000     0.861500    -0.520430
+
--- a/src/fci_tc_bi/scripts_fci_tc/FH.xyz
+++ b/src/fci_tc_bi/scripts_fci_tc/FH.xyz
@ -0,0 +1,5 @@
+2 
+
+H                    0.000000     0.000000    -0.825120
+F                    0.000000     0.000000     0.091680
+
--- a/src/fci_tc_bi/scripts_fci_tc/extract_tables.sh
+++ b/src/fci_tc_bi/scripts_fci_tc/extract_tables.sh
@ -0,0 +1,16 @@
+
+input=h2o
+basis=dz
+EZFIO=${input}_${basis}_bi_ortho
+file=${EZFIO}.tc_fci.out 
+grep "Ndet,E,E+PT2,E+RPT2,|PT2|=" ${file} | cut -d "=" -f 2 > data_${EZFIO}
+file=${EZFIO}.tc_fci_normal_order.out
+grep "Ndet,E,E+PT2,E+RPT2=" ${file} | cut -d "=" -f 2 > data_${EZFIO}_normal
+
+#EZFIO=${input}_${basis}_ortho
+#file=${EZFIO}.tc_fci.out 
+#grep "Ndet, E_tc, E+PT2 =" ${file} | cut -d "=" -f 2 > data_${EZFIO}
+#file=${EZFIO}.tc_fci_normal_order.out
+#grep "Ndet, E_tc, E+PT2 =" ${file} | cut -d "=" -f 2 > data_${EZFIO}_normal
+
+#zip data_${input}_${basis}.zip data*
--- a/src/fci_tc_bi/scripts_fci_tc/h2o.sh
+++ b/src/fci_tc_bi/scripts_fci_tc/h2o.sh
@ -0,0 +1,41 @@
+#!/bin/bash
+# This is a sample PBS script
+# temps CPU a ajuster au calcul
+   #PBS -l cput=2000:00:00
+   #PBS -l nodes=1:ppn=16
+# memoire a ajuster au calcul
+   #PBS -l vmem=100gb
+# a changer
+# Pour savoir sur quel noeud on est
+#echo $HOSTNAME
+# Startdir = ou sont les fichiers d'input, par defaut HOMEDIR
+#
+StartDir=$PBS_O_WORKDIR
+echo $StartDir
+#
+# SCRATCHDIR = espace temporaire (local au noeud et a vider apres le calcul)
+# NE PAS MODIFIER
+ulimit -s unlimited
+export SCRATCHDIR=/scratch/$USER/$PBS_JOBID
+#
+cd $StartDir
+
+
+############################################################################
+#### EXAMPLE OF SCRIPT TO RUN A CIPSI CALCULATION ON 5 STATES ON THE Ne^+ CATION
+#### USING NATURAL ORBITALS OF A SMALL CIPSI AS MOS 
+#### ALL STATES WILL HAVE THE SAME SPIN SIMETRY : A DOUBLET 
+
+####### YOU PUT THE PATH TO YOUR 
+QP_ROOT=/home_lct/eginer/programs/qp2
+source ${QP_ROOT}/quantum_package.rc 
+####### YOU LOAD SOME LIBRARIES 
+alias python3='/programmes/installation/Python/3.7.1/bin/python3'
+type -a python3
+
+export OMP_NUM_THREADS=16
+
+module load intel2016_OMPI-V2
+
+source ~/programs/qp2/quantum_package.rc
+./script.sh h2o dz O 1
--- a/src/fci_tc_bi/scripts_fci_tc/h2o.xyz
+++ b/src/fci_tc_bi/scripts_fci_tc/h2o.xyz
@ -0,0 +1,6 @@
+3
+
+O      0.000000     0.000000     0.000000
+H      0.000000     0.000000     0.957200
+H     -0.926627     0.000000    -0.239987
+
--- a/src/fci_tc_bi/scripts_fci_tc/script.sh
+++ b/src/fci_tc_bi/scripts_fci_tc/script.sh
@ -0,0 +1,31 @@
+source /home_lct/eginer/qp2/quantum_package.rc
+input=$1
+ basis=$2
+ atom=$3
+ mul=$4
+ EXPORT_OMP_NUM_THREADS=16
+ dir=${input}_${basis}
+ mkdir ${dir}
+ cp ${input}.xyz ${dir}/
+ cd $dir
+ EZFIO=${input}_${basis}_bi_ortho
+ qp create_ezfio -b "${atom}:cc-pcv${basis}|H:cc-pv${basis}" ${input}.xyz -m $mul -o $EZFIO
+ qp run scf
+ # Getting THE GOOD VALUE OF MU
+ qp run print_mu_av_tc  | tee ${EZFIO_FILE}.mu_av.out
+ mu=`grep "average_mu_rs_c_lda  =" ${EZFIO_FILE}.mu_av.out | cut -d "=" -f 2`
+ qp set ao_two_e_erf_ints mu_erf $mu
+ # Carrying the BI-ORTHO TC-SCF 
+ qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out
+ #   Three body terms without normal order
+ ### THREE E TERMS FOR FCI
+ qp set tc_keywords three_body_h_tc True  
+ qp set tc_keywords double_normal_ord False
+ qp set perturbation pt2_max 0.003
+ qp run fci_tc_bi_ortho | tee ${EZFIO_FILE}.tc_fci.out
+ #   Three body terms with normal order
+ qp set tc_keywords double_normal_ord True
+ qp run fci_tc_bi_ortho | tee ${EZFIO_FILE}.tc_fci_normal_order.out
+
+cd ../
+
--- a/src/fci_tc_bi/selectors.irp.f
+++ b/src/fci_tc_bi/selectors.irp.f
@ -0,0 +1,100 @@
+use bitmasks
+
+BEGIN_PROVIDER [ double precision, threshold_selectors ]
+ implicit none
+ BEGIN_DOC
+ ! Thresholds on selectors (fraction of the square of the norm)
+ END_DOC
+ threshold_selectors = dsqrt(threshold_generators)
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, N_det_selectors]
+  implicit none
+  BEGIN_DOC
+  ! For Single reference wave functions, the number of selectors is 1 : the
+  ! Hartree-Fock determinant
+  END_DOC
+  integer                        :: i
+  double precision               :: norm, norm_max
+  call write_time(6)
+  N_det_selectors = N_det
+  norm = 1.d0
+  do i=1,N_det
+    norm = norm - psi_average_norm_contrib_tc(i)
+    if (norm - 1.d-10 < 1.d0 - threshold_selectors) then
+      N_det_selectors = i
+      exit
+    endif
+  enddo
+  N_det_selectors = max(N_det_selectors,N_det_generators)
+  call write_int(6,N_det_selectors,'Number of selectors')
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_selectors, (N_int,2,psi_selectors_size) ]
+&BEGIN_PROVIDER [ double precision, psi_selectors_coef, (psi_selectors_size,N_states) ]
+&BEGIN_PROVIDER [ double precision, psi_selectors_coef_tc, (psi_selectors_size,2,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! Determinants on which we apply <i|H|psi> for perturbation.
+  END_DOC
+  integer                        :: i,k
+
+  do i=1,N_det_selectors
+    do k=1,N_int
+      psi_selectors(k,1,i) = psi_det_sorted_tc(k,1,i)
+      psi_selectors(k,2,i) = psi_det_sorted_tc(k,2,i)
+    enddo
+  enddo
+  do k=1,N_states
+    do i=1,N_det_selectors
+      psi_selectors_coef(i,k) = psi_coef_sorted_tc_gen(i,k)
+      psi_selectors_coef_tc(i,1,k) = psi_l_coef_sorted_bi_ortho(i,k)
+      psi_selectors_coef_tc(i,2,k) = psi_r_coef_sorted_bi_ortho(i,k)
+!      psi_selectors_coef_tc(i,1,k) = 1.d0
+!      psi_selectors_coef_tc(i,2,k) = 1.d0
+    enddo
+  enddo
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
+&BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp_tc, (N_states,2,psi_selectors_size) ]
+  implicit none
+  BEGIN_DOC
+  ! Transposed psi_selectors
+  END_DOC
+  integer                        :: i,k
+
+  do i=1,N_det_selectors
+    do k=1,N_states
+      psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k)
+      psi_selectors_coef_transp_tc(k,1,i) = psi_selectors_coef_tc(i,1,k)
+      psi_selectors_coef_transp_tc(k,2,i) = psi_selectors_coef_tc(i,2,k)
+    enddo
+  enddo
+END_PROVIDER
+
+ BEGIN_PROVIDER [ double precision, psi_selectors_rcoef_bi_orth_transp, (N_states, psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_selectors_lcoef_bi_orth_transp, (N_states, psi_det_size) ]
+
+  implicit none
+  integer :: i, k
+
+  psi_selectors_rcoef_bi_orth_transp = 0.d0
+  psi_selectors_lcoef_bi_orth_transp = 0.d0
+
+  print*,'N_det,N_det_selectors',N_det,N_det_selectors
+  do i = 1, N_det_selectors
+    do k = 1, N_states
+      psi_selectors_rcoef_bi_orth_transp(k,i) = psi_r_coef_sorted_bi_ortho(i,k)
+      psi_selectors_lcoef_bi_orth_transp(k,i) = psi_l_coef_sorted_bi_ortho(i,k)
+    enddo
+  enddo
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, psi_selectors_size ]
+ implicit none
+ psi_selectors_size = psi_det_size
+END_PROVIDER
+
--- a/src/fci_tc_bi/zmq.irp.f
+++ b/src/fci_tc_bi/zmq.irp.f
@ -0,0 +1,103 @@
+BEGIN_TEMPLATE
+
+integer function zmq_put_$X(zmq_to_qp_run_socket,worker_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Put $X on the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer                        :: rc
+  character*(256)                :: msg
+
+  zmq_put_$X = 0
+
+  write(msg,'(A,1X,I8,1X,A200)') 'put_data '//trim(zmq_state), worker_id, '$X'
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
+  if (rc /= len(trim(msg))) then
+    zmq_put_$X = -1
+    return
+  endif
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,$X,4,0)
+  if (rc /= 4) then
+    zmq_put_$X = -1
+    return
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+  if (msg(1:rc) /= 'put_data_reply ok') then
+    zmq_put_$X = -1
+    return
+  endif
+
+end
+
+integer function zmq_get_$X(zmq_to_qp_run_socket, worker_id)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Get $X from the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer                        :: rc
+  character*(256)                :: msg
+
+  PROVIDE zmq_state
+  zmq_get_$X = 0
+  if (mpi_master) then
+
+    write(msg,'(A,1X,I8,1X,A200)') 'get_data '//trim(zmq_state), worker_id, '$X'
+    rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
+    if (rc /= len(trim(msg))) then
+      zmq_get_$X = -1
+      go to 10
+    endif
+
+    rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+    if (msg(1:14) /= 'get_data_reply') then
+      zmq_get_$X = -1
+      go to 10
+    endif
+
+    rc = f77_zmq_recv(zmq_to_qp_run_socket,$X,4,0)
+    if (rc /= 4) then
+      zmq_get_$X = -1
+      go to 10
+    endif
+
+  endif
+
+  10 continue
+
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+
+    call MPI_BCAST (zmq_get_$X, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      print *,  irp_here//': Unable to broadcast N_det_generators'
+      stop -1
+    endif
+    call MPI_BCAST ($X, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      print *,  irp_here//': Unable to broadcast N_det_generators'
+      stop -1
+    endif
+  IRP_ENDIF
+
+end
+
+SUBST [ X ]
+
+N_det_generators ;;
+N_det_selectors ;;
+
+END_TEMPLATE
+
--- a/src/iterations_tc/EZFIO.cfg
+++ b/src/iterations_tc/EZFIO.cfg
@ -0,0 +1,24 @@
+[n_iter]
+interface: ezfio
+doc: Number of saved iterations
+type:integer
+default: 1
+
+[n_det_iterations]
+interface: ezfio, provider
+doc: Number of determinants at each iteration
+type: integer
+size: (100)
+ 
+[energy_iterations]
+interface: ezfio, provider
+doc: The variational energy at each iteration
+type: double precision 
+size: (determinants.n_states,100)
+
+[pt2_iterations]
+interface: ezfio, provider
+doc: The |PT2| correction at each iteration
+type: double precision 
+size: (determinants.n_states,100)
+
--- a/src/iterations_tc/NEED
+++ b/src/iterations_tc/NEED
--- a/src/iterations_tc/io.irp.f
+++ b/src/iterations_tc/io.irp.f
@ -0,0 +1,37 @@
+BEGIN_PROVIDER [ integer, n_iter  ]
+  implicit none
+  BEGIN_DOC
+! number of iterations
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  if (mpi_master) then
+
+      double precision :: zeros(N_states,100)
+      integer :: izeros(100)
+      zeros = 0.d0
+      izeros = 0
+      call ezfio_set_iterations_n_iter(0)
+      call ezfio_set_iterations_energy_iterations(zeros)
+      call ezfio_set_iterations_pt2_iterations(zeros)
+      call ezfio_set_iterations_n_det_iterations(izeros)
+      n_iter = 1
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer :: ierr
+    call MPI_BCAST( n_iter, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read n_iter with MPI'
+    endif
+  IRP_ENDIF
+
+  call write_time(6)
+
+END_PROVIDER
+
--- a/src/iterations_tc/iterations.irp.f
+++ b/src/iterations_tc/iterations.irp.f
@ -0,0 +1,43 @@
+BEGIN_PROVIDER [ double precision, extrapolated_energy, (N_iter,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! Extrapolated energy, using E_var = f(PT2) where PT2=0
+ END_DOC
+! integer :: i
+ extrapolated_energy = 0.D0
+END_PROVIDER 
+
+ subroutine get_extrapolated_energy(Niter,ept2,pt1,extrap_energy)
+ implicit none
+ integer, intent(in)  :: Niter
+ double precision, intent(in) :: ept2(Niter),pt1(Niter),extrap_energy(Niter)
+ call extrapolate_data(Niter,ept2,pt1,extrap_energy)
+ end
+
+subroutine save_iterations(e_, pt2_,n_)
+  implicit none
+  BEGIN_DOC
+! Update the energy in the EZFIO file.
+  END_DOC
+  integer, intent(in) :: n_
+  double precision, intent(in) :: e_(N_states), pt2_(N_states)
+  integer :: i
+
+  if (N_iter == 101) then
+    do i=2,N_iter-1
+      energy_iterations(1:N_states,N_iter-1) = energy_iterations(1:N_states,N_iter)
+      pt2_iterations(1:N_states,N_iter-1) = pt2_iterations(1:N_states,N_iter) 
+    enddo
+    N_iter = N_iter-1
+    TOUCH N_iter
+  endif
+
+  energy_iterations(1:N_states,N_iter) = e_(1:N_states)
+     pt2_iterations(1:N_states,N_iter) = pt2_(1:N_states)
+  n_det_iterations(N_iter) = n_
+  call ezfio_set_iterations_N_iter(N_iter)
+  call ezfio_set_iterations_energy_iterations(energy_iterations)
+  call ezfio_set_iterations_pt2_iterations(pt2_iterations)
+  call ezfio_set_iterations_n_det_iterations(n_det_iterations)
+end
+
--- a/src/iterations_tc/print_extrapolation.irp.f
+++ b/src/iterations_tc/print_extrapolation.irp.f
@ -0,0 +1,46 @@
+subroutine print_extrapolated_energy
+  implicit none
+  BEGIN_DOC
+! Print the extrapolated energy in the output
+  END_DOC
+
+  integer :: i,k
+
+  if (N_iter< 2) then
+    return
+  endif
+  write(*,'(A)') ''
+  write(*,'(A)') 'Extrapolated energies'
+  write(*,'(A)') '------------------------'
+  write(*,'(A)') ''
+
+  print *,  ''
+  print *,  'State ', 1
+  print *,  ''
+  write(*,*)  '=========== ', '==================='
+  write(*,*)  'minimum PT2 ', 'Extrapolated energy'
+  write(*,*)  '=========== ', '==================='
+  do k=2,min(N_iter,8)
+    write(*,'(F11.4,2X,F18.8)') pt2_iterations(1,N_iter+1-k), extrapolated_energy(k,1)
+  enddo
+  write(*,*)  '=========== ', '==================='
+
+  do i=2, min(N_states,N_det)
+    print *,  ''
+    print *,  'State ', i
+    print *,  ''
+    write(*,*)  '=========== ', '=================== ', '=================== ', '==================='
+    write(*,*)  'minimum PT2 ', 'Extrapolated energy ', '  Excitation (a.u)  ', '  Excitation (eV)  '
+    write(*,*)  '=========== ', '=================== ', '=================== ', '==================='
+    do k=2,min(N_iter,8)
+      write(*,'(F11.4,X,3(X,F18.8))') pt2_iterations(i,N_iter+1-k), extrapolated_energy(k,i), &
+          extrapolated_energy(k,i) - extrapolated_energy(k,1), &
+          (extrapolated_energy(k,i) - extrapolated_energy(k,1) ) * 27.211396641308d0
+    enddo
+    write(*,*)  '=========== ', '=================== ', '=================== ', '==================='
+  enddo
+
+  print *,  ''
+
+end subroutine
+
--- a/src/iterations_tc/print_summary.irp.f
+++ b/src/iterations_tc/print_summary.irp.f
@ -0,0 +1,104 @@
+subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_configuration_,n_st,s2_)
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Print the extrapolated energy in the output
+  END_DOC
+
+  integer, intent(in)            :: n_det_, n_configuration_, n_st
+  double precision, intent(in)   :: e_(n_st), s2_(n_st)
+  type(pt2_type)  , intent(in)   :: pt2_data, pt2_data_err
+  integer                        :: i, k
+  integer                        :: N_states_p
+  character*(9)                  :: pt2_string
+  character*(512)                :: fmt
+
+  if (do_pt2) then
+    pt2_string = '        '
+  else
+    pt2_string = '(approx)'
+  endif
+
+  N_states_p = min(N_det_,n_st)
+
+  print *, ''
+  print '(A,I12)',  'Summary at N_det = ', N_det_
+  print '(A)',      '-----------------------------------'
+  print *, ''
+
+  write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))'
+  write(*,fmt)
+  write(fmt,*) '(13X,', N_states_p, '(6X,A7,1X,I6,10X))'
+  write(*,fmt) ('State',k, k=1,N_states_p)
+  write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))'
+  write(*,fmt)
+  write(fmt,*) '(A13,', N_states_p, '(1X,F14.8,15X))'
+  write(*,fmt) '# E          ', e_(1:N_states_p)
+  if (N_states_p > 1) then
+    write(*,fmt) '# Excit. (au)', e_(1:N_states_p)-e_(1)
+    write(*,fmt) '# Excit. (eV)', (e_(1:N_states_p)-e_(1))*27.211396641308d0
+  endif
+  write(fmt,*) '(A13,', 2*N_states_p, '(1X,F14.8))'
+  write(*,fmt) '# PT2 '//pt2_string, (pt2_data % pt2(k), pt2_data_err % pt2(k), k=1,N_states_p)
+  write(*,fmt) '# rPT2'//pt2_string, (pt2_data % rpt2(k), pt2_data_err % rpt2(k), k=1,N_states_p)
+  write(*,'(A)') '#'
+  write(*,fmt) '# E+PT2      ', (e_(k)+pt2_data % pt2(k),pt2_data_err % pt2(k), k=1,N_states_p)
+  write(*,fmt) '# E+rPT2     ', (e_(k)+pt2_data % rpt2(k),pt2_data_err % rpt2(k), k=1,N_states_p)
+  if (N_states_p > 1) then
+    write(*,fmt) '# Excit. (au)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1)), &
+      dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1)), k=1,N_states_p)
+    write(*,fmt) '# Excit. (eV)', ( (e_(k)+pt2_data % pt2(k)-e_(1)-pt2_data % pt2(1))*27.211396641308d0, &
+      dsqrt(pt2_data_err % pt2(k)*pt2_data_err % pt2(k)+pt2_data_err % pt2(1)*pt2_data_err % pt2(1))*27.211396641308d0, k=1,N_states_p)
+  endif
+  write(fmt,*) '(''# ============'',', N_states_p, '(1X,''=============================''))'
+  write(*,fmt)
+  print *,  ''
+
+  print *,  'N_det             = ', N_det_
+  print *,  'N_states          = ', n_st
+  if (s2_eig) then
+    print *,  'N_cfg             = ', N_configuration_
+    if (only_expected_s2) then
+      print *,  'N_csf             = ', N_csf
+    endif
+  endif
+  print *,  ''
+
+  do k=1, N_states_p
+    print*,'* State ',k
+    print *,  '< S^2 >         = ', s2_(k)
+    print *,  'E               = ', e_(k)
+    print *,  'Variance        = ', pt2_data % variance(k), ' +/- ', pt2_data_err % variance(k)
+    print *,  'PT norm         = ', dsqrt(pt2_data % overlap(k,k)), ' +/- ', 0.5d0*dsqrt(pt2_data % overlap(k,k)) * pt2_data_err % overlap(k,k) / (pt2_data % overlap(k,k))
+    print *,  'PT2             = ', pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k)
+    print *,  'rPT2            = ', pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k)
+    print *,  'E+PT2 '//pt2_string//' = ', e_(k)+pt2_data % pt2(k), ' +/- ', pt2_data_err % pt2(k)
+    print *,  'E+rPT2'//pt2_string//' = ', e_(k)+pt2_data % rpt2(k), ' +/- ', pt2_data_err % rpt2(k)
+    print *,  ''
+  enddo
+
+  print *,  '-----'
+  if(n_st.gt.1)then
+    print *, 'Variational Energy difference (au | eV)'
+    do i=2, N_states_p
+      print*,'Delta E = ', (e_(i) - e_(1)), &
+        (e_(i) - e_(1)) * 27.211396641308d0
+    enddo
+    print *,  '-----'
+    print*, 'Variational + perturbative Energy difference (au | eV)'
+    do i=2, N_states_p
+      print*,'Delta E = ', (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))), &
+        (e_(i)+ pt2_data % pt2(i) - (e_(1) + pt2_data % pt2(1))) * 27.211396641308d0
+    enddo
+    print *,  '-----'
+    print*, 'Variational + renormalized perturbative Energy difference (au | eV)'
+    do i=2, N_states_p
+      print*,'Delta E = ', (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))), &
+        (e_(i)+ pt2_data % rpt2(i) - (e_(1) + pt2_data % rpt2(1))) * 27.211396641308d0
+    enddo
+  endif
+
+!  call print_energy_components()
+
+end subroutine
+
--- a/src/ortho_three_e_ints/NEED
+++ b/src/ortho_three_e_ints/NEED
--- a/src/ortho_three_e_ints/io_6_index_tensor.irp.f
+++ b/src/ortho_three_e_ints/io_6_index_tensor.irp.f
--- a/src/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f
+++ b/src/ortho_three_e_ints/mu_j_ints_usual_mos.irp.f
@ -1,8 +1,29 @@
+subroutine give_integrals_3_body(i,j,m,k,l,n,integral)
+ implicit none
+ double precision, intent(out) :: integral
+ integer, intent(in) :: i,j,m,k,l,n
+ double precision :: weight
+ BEGIN_DOC
+! <ijm|L|kln>
+ END_DOC
+ integer :: ipoint,mm
+ integral = 0.d0
+ do mm = 1, 3
+  do ipoint = 1, n_points_final_grid
+   weight = final_weight_at_r_vector(ipoint)                                                                          
+   integral += weight * mos_in_r_array_transp(ipoint,i) * mos_in_r_array_transp(ipoint,k) * x_W_ij_erf_rk(ipoint,mm,m,n) * x_W_ij_erf_rk(ipoint,mm,j,l) 
+   integral += weight * mos_in_r_array_transp(ipoint,j) * mos_in_r_array_transp(ipoint,l) * x_W_ij_erf_rk(ipoint,mm,m,n) * x_W_ij_erf_rk(ipoint,mm,i,k) 
+   integral += weight * mos_in_r_array_transp(ipoint,m) * mos_in_r_array_transp(ipoint,n) * x_W_ij_erf_rk(ipoint,mm,j,l) * x_W_ij_erf_rk(ipoint,mm,i,k) 
+  enddo
+ enddo
+end

 BEGIN_PROVIDER [ double precision, mo_v_ij_erf_rk_cst_mu_naive, ( mo_num, mo_num,n_points_final_grid)]
 implicit none
 BEGIN_DOC
 ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1 )/(2|r - R|) on the MO basis
+!
+! WARNING: not on the BI-ORTHO MOs
 END_DOC
 integer :: i,j,k,l,ipoint
 do ipoint = 1, n_points_final_grid
@ -23,6 +44,8 @@ BEGIN_PROVIDER [ double precision, mo_v_ij_erf_rk_cst_mu, ( mo_num, mo_num,n_poi
 implicit none
 BEGIN_DOC
 ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/(2|r - R|) on the MO basis
+!
+! WARNING: not on the BI-ORTHO MOs
 END_DOC
 integer :: ipoint
 !$OMP PARALLEL                  &
@ -42,6 +65,8 @@ BEGIN_PROVIDER [ double precision, mo_v_ij_erf_rk_cst_mu_transp, ( n_points_fina
 implicit none
 BEGIN_DOC
 ! int dr phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/(2|r - R|) on the MO basis
+!
+! WARNING: not on the BI-ORTHO MOs
 END_DOC
 integer :: ipoint,i,j
 do i = 1, mo_num
@ -59,6 +84,8 @@ BEGIN_PROVIDER [ double precision, mo_x_v_ij_erf_rk_cst_mu_naive, ( mo_num, mo_n
 implicit none
 BEGIN_DOC
 ! int dr  x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1 )/|r - R| on the MO basis
+!
+! WARNING: not on the BI-ORTHO MOs
 END_DOC
 integer :: i,j,k,l,ipoint,m
 do ipoint = 1, n_points_final_grid
@ -81,6 +108,8 @@ BEGIN_PROVIDER [ double precision, mo_x_v_ij_erf_rk_cst_mu, ( mo_num, mo_num,3,n
 implicit none
 BEGIN_DOC
 ! int dr x * phi_i(r) phi_j(r) (erf(mu(R) |r - R|) - 1)/2|r - R| on the MO basis
+!
+! WARNING: not on the BI-ORTHO MOs
 END_DOC
 integer :: ipoint,m
 !$OMP PARALLEL                  &
@ -119,6 +148,8 @@ BEGIN_PROVIDER [ double precision, x_W_ij_erf_rk, ( n_points_final_grid,3,mo_num
 implicit none
 BEGIN_DOC
 ! W_mn^X(R) = \int dr phi_m(r) phi_n(r) (1 - erf(mu |r-R|)) (x-X)
+!
+! WARNING: not on the BI-ORTHO MOs
 END_DOC
 include 'constants.include.F'
 integer :: ipoint,m,i,j
@ -160,48 +191,3 @@ BEGIN_PROVIDER [ double precision, sqrt_weight_at_r, (n_points_final_grid)]
 enddo
 END_PROVIDER 

-!BEGIN_PROVIDER [ double precision, mos_in_r_array_transp_sq_weight, (n_points_final_grid,mo_num)]
-
-
-!BEGIN_PROVIDER [ double precision, gauss_ij_rk_transp, (ao_num, ao_num, n_points_final_grid) ]
-! implicit none
-! integer :: i,j,ipoint
-! do ipoint = 1, n_points_final_grid
-!  do j = 1, ao_num
-!   do i = 1, ao_num
-!    gauss_ij_rk_transp(i,j,ipoint) = gauss_ij_rk(ipoint,i,j)
-!   enddo
-!  enddo
-! enddo
-!END_PROVIDER 
-!
-!
-!BEGIN_PROVIDER [ double precision, mo_gauss_ij_rk, ( mo_num, mo_num,n_points_final_grid)]
-! implicit none
-! integer :: ipoint
-! !$OMP PARALLEL                  &
-! !$OMP DEFAULT (NONE)            &
-! !$OMP PRIVATE (ipoint) & 
-! !$OMP SHARED (n_points_final_grid,gauss_ij_rk_transp,mo_gauss_ij_rk)
-! !$OMP DO SCHEDULE (dynamic)
-! do ipoint = 1, n_points_final_grid
-!  call ao_to_mo(gauss_ij_rk_transp(1,1,ipoint),size(gauss_ij_rk_transp,1),mo_gauss_ij_rk(1,1,ipoint),size(mo_gauss_ij_rk,1))
-! enddo
-! !$OMP END DO
-! !$OMP END PARALLEL
-!
-!END_PROVIDER 
-!
-!BEGIN_PROVIDER [ double precision, mo_gauss_ij_rk_transp, (n_points_final_grid, mo_num, mo_num)]
-! implicit none
-! integer :: i,j,ipoint
-! do ipoint = 1, n_points_final_grid
-!  do j = 1, mo_num
-!   do i = 1, mo_num
-!    mo_gauss_ij_rk_transp(ipoint,i,j) = mo_gauss_ij_rk(i,j,ipoint)
-!   enddo
-!  enddo
-! enddo
-!
-!END_PROVIDER 
-!
--- a/src/tc_bi_ortho/12.tc_bi_ortho.bats
+++ b/src/tc_bi_ortho/12.tc_bi_ortho.bats
@ -0,0 +1,49 @@
+#!/usr/bin/env bats
+
+source $QP_ROOT/tests/bats/common.bats.sh
+source $QP_ROOT/quantum_package.rc
+
+
+function run_Ne() {
+  qp set_file Ne_tc_scf 
+  qp run cisd 
+  qp run tc_bi_ortho | tee Ne_tc_scf.cisd_tc_bi_ortho.out  
+  eref=-128.77020441279302
+  energy="$(grep "eigval_right_tc_bi_orth =" Ne_tc_scf.cisd_tc_bi_ortho.out)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "Ne" {
+ run_Ne 
+}
+
+
+function run_C() {
+  qp set_file C_tc_scf 
+  qp run cisd 
+  qp run tc_bi_ortho | tee C_tc_scf.cisd_tc_bi_ortho.out  
+  eref=-37.757536149952514
+  energy="$(grep "eigval_right_tc_bi_orth =" C_tc_scf.cisd_tc_bi_ortho.out)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "C" {
+ run_C 
+}
+
+function run_O() {
+  qp set_file C_tc_scf 
+  qp run cisd 
+  qp run tc_bi_ortho | tee O_tc_scf.cisd_tc_bi_ortho.out  
+  eref=-74.908518517716161
+  energy="$(grep "eigval_right_tc_bi_orth =" O_tc_scf.cisd_tc_bi_ortho.out)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "O" {
+ run_O 
+}
+
--- a/src/tc_bi_ortho/EZFIO.cfg
+++ b/src/tc_bi_ortho/EZFIO.cfg
@ -0,0 +1,11 @@
+[psi_l_coef_bi_ortho]
+interface: ezfio
+doc: Coefficients for the left wave function
+type: double precision
+size: (determinants.n_det,determinants.n_states)
+
+[psi_r_coef_bi_ortho]
+interface: ezfio
+doc: Coefficients for the right wave function
+type: double precision
+size: (determinants.n_det,determinants.n_states)
--- a/src/tc_bi_ortho/NEED
+++ b/src/tc_bi_ortho/NEED
@ -0,0 +1,6 @@
+bi_ort_ints
+bi_ortho_mos
+tc_keywords
+non_hermit_dav
+dav_general_mat
+tc_scf
--- a/src/tc_bi_ortho/compute_deltamu_right.irp.f
+++ b/src/tc_bi_ortho/compute_deltamu_right.irp.f
@ -0,0 +1,53 @@
+program compute_deltamu_right
+
+  implicit none
+
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  read_wf = .True.
+  touch read_wf
+
+  PROVIDE N_int 
+  call delta_right()
+
+end
+
+! ---
+
+subroutine delta_right()
+
+  implicit none
+  integer                       :: k
+  double precision, allocatable :: delta(:,:) 
+
+  print *, j1b_type
+  print *, j1b_pen
+  print *, mu_erf
+
+  allocate( delta(N_det,N_states) )
+  delta = 0.d0
+
+  do k = 1, N_states
+  !do k = 1, 1
+
+    ! get < I_left | H_mu - H | psi_right > 
+    !call get_h_bitc_right(psi_det, psi_r_coef_bi_ortho(:,k), N_det, N_int, delta(:,k))
+    call get_delta_bitc_right(psi_det, psi_r_coef_bi_ortho(:,k), N_det, N_int, delta(:,k))
+
+    ! order as QMCCHEM
+    call dset_order(delta(:,k), psi_bilinear_matrix_order, N_det)
+
+  enddo
+
+!  call ezfio_set_dmc_dress_dmc_delta_h(delta)
+
+  deallocate(delta)
+
+  return
+end subroutine delta_right
+
+! ---
+
--- a/src/tc_bi_ortho/dressing_vectors_lr.irp.f
+++ b/src/tc_bi_ortho/dressing_vectors_lr.irp.f
@ -0,0 +1,155 @@
+
+! ---
+
+subroutine get_delta_bitc_right(psidet, psicoef, ndet, Nint, delta)
+  
+  BEGIN_DOC
+  !
+  ! delta(I) = < I_left | H_TC - H | Psi_right >
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in)  :: ndet, Nint
+  double precision,  intent(in)  :: psicoef(ndet)
+  integer(bit_kind), intent(in)  :: psidet(Nint,2,ndet)
+  double precision, intent(out)  :: delta(ndet)
+
+  integer                        :: i, j
+  double precision               :: h_mono, h_twoe, h_tot
+  double precision               :: htc_mono, htc_twoe, htc_three, htc_tot
+  double precision               :: delta_mat
+
+  print *, ' get_delta_bitc_right ...'
+
+  i = 1
+  j = 1
+  call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+  call hmat_bi_ortho         (psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
+
+  delta = 0.d0
+ !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8)   &
+ !$OMP SHARED(delta, ndet, psidet, psicoef, Nint)      &
+ !$OMP PRIVATE(i, j, delta_mat, h_mono, h_twoe, h_tot, &
+ !$OMP        htc_mono, htc_twoe, htc_three, htc_tot)
+  do i = 1, ndet
+    do j = 1, ndet
+
+      ! < I | Htilde | J >
+      call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+      ! < I | H | J >
+      call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
+
+      delta_mat = htc_tot - h_tot
+
+      delta(i) = delta(i) + psicoef(j) * delta_mat
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+end subroutine get_delta_bitc_right
+
+! ---
+
+subroutine get_htc_bitc_right(psidet, psicoef, ndet, Nint, delta)
+  
+  BEGIN_DOC
+  !
+  ! delta(I) = < I_left | H_TC | Psi_right >
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in)  :: ndet, Nint
+  double precision,  intent(in)  :: psicoef(ndet)
+  integer(bit_kind), intent(in)  :: psidet(Nint,2,ndet)
+  double precision, intent(out)  :: delta(ndet)
+
+  integer                        :: i, j
+  double precision               :: htc_mono, htc_twoe, htc_three, htc_tot
+
+  print *, ' get_htc_bitc_right ...'
+
+  i = 1
+  j = 1
+  call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+
+  delta = 0.d0
+ !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8)   &
+ !$OMP SHARED(delta, ndet, psidet, psicoef, Nint)      &
+ !$OMP PRIVATE(i, j, htc_mono, htc_twoe, htc_three, htc_tot)
+  do i = 1, ndet
+    do j = 1, ndet
+
+      ! < I | Htilde | J >
+      call htilde_mu_mat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, htc_mono, htc_twoe, htc_three, htc_tot)
+
+      delta(i) = delta(i) + psicoef(j) * htc_tot
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+end subroutine get_htc_bitc_right
+
+! ---
+
+subroutine get_h_bitc_right(psidet, psicoef, ndet, Nint, delta)
+  
+  BEGIN_DOC
+  !
+  ! delta(I) = < I_left | H | Psi_right >
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in)  :: ndet, Nint
+  double precision,  intent(in)  :: psicoef(ndet)
+  integer(bit_kind), intent(in)  :: psidet(Nint,2,ndet)
+  double precision, intent(out)  :: delta(ndet)
+
+  integer                        :: i, j
+  double precision               :: h_mono, h_twoe, h_tot
+
+  print *, ' get_h_bitc_right ...'
+
+  i = 1
+  j = 1
+  call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
+
+  !double precision :: norm
+  !norm = 0.d0
+  !do i = 1, ndet
+  !  norm += psicoef(i) * psicoef(i)
+  !enddo
+  !print*, ' norm = ', norm
+ 
+  call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
+
+  delta = 0.d0
+! !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) &
+! !$OMP SHARED(delta, ndet, psidet, psicoef, Nint)    &
+! !$OMP PRIVATE(i, j, h_mono, h_twoe, h_tot)
+  do i = 1, ndet
+    do j = 1, ndet
+
+      ! < I | H | J >
+      call hmat_bi_ortho(psidet(1,1,i), psidet(1,1,j), Nint, h_mono, h_twoe, h_tot)
+
+      delta(i) = delta(i) + psicoef(j) * h_tot
+    enddo
+  enddo
+! !$OMP END PARALLEL DO
+
+end subroutine get_h_bitc_right
+
+! ---
+
--- a/src/tc_bi_ortho/e_corr_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/e_corr_bi_ortho.irp.f
@ -0,0 +1,104 @@
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+ BEGIN_PROVIDER [ double precision, e_tilde_00]
+ implicit none
+ double precision :: hmono,htwoe,hthree,htot
+ call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,htot)
+ e_tilde_00 = htot
+ END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, e_pt2_tc_bi_orth]
+&BEGIN_PROVIDER [ double precision, e_pt2_tc_bi_orth_single]
+&BEGIN_PROVIDER [ double precision, e_pt2_tc_bi_orth_double]
+ implicit none 
+ integer :: i,degree
+ double precision :: hmono,htwoe,hthree,htilde_ij,coef_pt1,e_i0,delta_e
+ e_pt2_tc_bi_orth = 0.d0
+ e_pt2_tc_bi_orth_single = 0.d0
+ e_pt2_tc_bi_orth_double = 0.d0
+ do i = 1, N_det
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+  if(degree == 1 .or. degree == 2)then
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+   delta_e = e_tilde_00 - e_i0
+   coef_pt1 = htilde_ij / delta_e
+   call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+   e_pt2_tc_bi_orth += coef_pt1 * htilde_ij
+   if(degree == 1)then
+    e_pt2_tc_bi_orth_single += coef_pt1 * htilde_ij
+   else 
+!    print*,'coef_pt1, e_pt2',coef_pt1,coef_pt1 * htilde_ij
+    e_pt2_tc_bi_orth_double += coef_pt1 * htilde_ij
+   endif
+  endif
+ enddo
+ END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, e_tilde_bi_orth_00]
+ implicit none
+ double precision :: hmono,htwoe,hthree,htilde_ij
+ call htilde_mu_mat_bi_ortho(HF_bitmask,HF_bitmask,N_int,hmono,htwoe,hthree,e_tilde_bi_orth_00)
+ e_tilde_bi_orth_00 += nuclear_repulsion
+ END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, e_corr_bi_orth ]
+&BEGIN_PROVIDER [ double precision, e_corr_bi_orth_proj ]
+&BEGIN_PROVIDER [ double precision, e_corr_single_bi_orth ]
+&BEGIN_PROVIDER [ double precision, e_corr_double_bi_orth ]
+ implicit none 
+ integer :: i,degree
+ double precision :: hmono,htwoe,hthree,htilde_ij
+ 
+ e_corr_bi_orth = 0.d0
+ e_corr_single_bi_orth = 0.d0
+ e_corr_double_bi_orth = 0.d0
+ do i = 1, N_det
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+  call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+  if(degree == 1)then
+   e_corr_single_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)
+  else if(degree == 2)then
+   e_corr_double_bi_orth += reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)
+!   print*,'coef_wf , e_cor',reigvec_tc_bi_orth(i,1)/reigvec_tc_bi_orth(1,1), reigvec_tc_bi_orth(i,1) * htilde_ij/reigvec_tc_bi_orth(1,1)
+  endif
+ enddo
+ e_corr_bi_orth_proj = e_corr_single_bi_orth + e_corr_double_bi_orth
+ e_corr_bi_orth = eigval_right_tc_bi_orth(1) - e_tilde_bi_orth_00
+ END_PROVIDER 
+
+ BEGIN_PROVIDER [ double precision, e_tc_left_right ]
+ implicit none
+ integer :: i,j
+ double precision :: hmono,htwoe,hthree,htilde_ij,accu
+ e_tc_left_right = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  accu += reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(i,1)
+  do j = 1, N_det
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j),psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+   e_tc_left_right += htilde_ij * reigvec_tc_bi_orth(i,1) * leigvec_tc_bi_orth(j,1)
+  enddo
+ enddo
+ e_tc_left_right *= 1.d0/accu 
+ e_tc_left_right += nuclear_repulsion
+
+ END_PROVIDER 
+
+
+BEGIN_PROVIDER [ double precision, coef_pt1_bi_ortho, (N_det)]
+ implicit none
+ integer :: i,degree
+ double precision :: hmono,htwoe,hthree,htilde_ij,coef_pt1,e_i0,delta_e
+ do i = 1, N_det
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+  if(degree==0)then
+   coef_pt1_bi_ortho(i) = 1.d0
+  else
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+   delta_e = e_tilde_00 - e_i0
+   coef_pt1 = htilde_ij / delta_e
+   coef_pt1_bi_ortho(i)= coef_pt1
+  endif
+ enddo
+END_PROVIDER
--- a/src/tc_bi_ortho/h_biortho.irp.f
+++ b/src/tc_bi_ortho/h_biortho.irp.f
@ -0,0 +1,243 @@
+
+! --
+
+subroutine hmat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, htot)
+
+  BEGIN_DOC
+  !
+  ! < key_j | H | key_i > where | key_j > is developed on the LEFT basis and | key_i > is developed on the RIGHT basis
+  ! 
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, htot
+
+  integer                       :: degree 
+
+  hmono = 0.d0
+  htwoe = 0.d0
+  htot  = 0.d0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree .gt. 2) return
+
+  if(degree == 0) then
+
+    call diag_hmat_bi_ortho(Nint, key_i, hmono, htwoe)
+    htot = htot + nuclear_repulsion
+
+  else if (degree == 1) then
+
+    call single_hmat_bi_ortho(Nint, key_j, key_i, hmono, htwoe)
+
+  else if(degree == 2) then
+
+    call double_hmat_bi_ortho(Nint, key_j, key_i, hmono, htwoe)
+
+  endif
+
+  htot += hmono + htwoe
+
+  return
+end subroutine hmat_bi_ortho
+
+! ---
+
+subroutine diag_hmat_bi_ortho(Nint, key_i, hmono, htwoe)
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: Ne(2), i, j, ii, jj, ispin, jspin
+
+  hmono = 0.d0
+  htwoe = 0.d0
+
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  do ispin = 1, 2 
+    do i = 1, Ne(ispin)
+      ii = occ(i,ispin) 
+      hmono += mo_bi_ortho_one_e(ii,ii)
+    enddo
+  enddo
+
+  ! alpha/beta two-body
+  ispin = 1
+  jspin = 2 
+  do i = 1, Ne(ispin) ! electron 1
+    ii = occ(i,ispin) 
+    do j = 1, Ne(jspin) ! electron 2 
+      jj = occ(j,jspin) 
+      htwoe += mo_bi_ortho_coul_e(jj,ii,jj,ii) 
+    enddo
+  enddo
+ 
+  ! alpha/alpha two-body
+  do i = 1, Ne(ispin)
+    ii = occ(i,ispin) 
+    do j = i+1, Ne(ispin)
+      jj = occ(j,ispin) 
+      htwoe += mo_bi_ortho_coul_e(ii,jj,ii,jj) - mo_bi_ortho_coul_e(ii,jj,jj,ii)
+    enddo
+  enddo
+ 
+  ! beta/beta two-body
+  do i = 1, Ne(jspin)
+    ii = occ(i,jspin) 
+    do j = i+1, Ne(jspin)
+      jj = occ(j,jspin) 
+      htwoe += mo_bi_ortho_coul_e(ii,jj,ii,jj) - mo_bi_ortho_coul_e(ii,jj,jj,ii)
+    enddo
+  enddo
+
+  return
+end subroutine diag_hmat_bi_ortho
+
+! ---
+
+subroutine single_hmat_bi_ortho(Nint, key_j, key_i, hmono, htwoe)
+
+  BEGIN_DOC
+  !
+  ! < key_j | H | key_i > for single excitation 
+  ! 
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe
+
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, ispin, jspin
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  integer                       :: other_spin(2)
+  double precision              :: phase
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  hmono = 0.d0
+  htwoe = 0.d0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree .ne. 1) then
+    return
+  endif
+
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 1, h1, p1, h2, p2, s1, s2)
+
+  hmono = mo_bi_ortho_one_e(p1,h1) * phase
+
+  ! alpha/beta two-body 
+  ispin = other_spin(s1)
+  if(s1 == 1) then
+
+    ! single alpha 
+    do i = 1, Ne(ispin) ! electron 2 
+      ii = occ(i,ispin) 
+      htwoe += mo_bi_ortho_coul_e(ii,p1,ii,h1) 
+    enddo
+
+  else
+
+    ! single beta 
+    do i = 1, Ne(ispin) ! electron 1 
+      ii = occ(i,ispin) 
+      htwoe += mo_bi_ortho_coul_e(p1,ii,h1,ii) 
+    enddo
+
+  endif
+
+  ! same spin two-body 
+  do i = 1, Ne(s1)
+    ii = occ(i,s1) 
+    ! ( h1 p1 |ii ii ) - ( h1 ii | p1 ii )
+    htwoe += mo_bi_ortho_coul_e(ii,p1,ii,h1) - mo_bi_ortho_coul_e(p1,ii,ii,h1) 
+  enddo
+   
+  htwoe *= phase
+
+end subroutine single_hmat_bi_ortho
+
+! ---
+
+subroutine double_hmat_bi_ortho(Nint, key_j, key_i, hmono, htwoe)
+
+  BEGIN_DOC
+  !
+  ! < key_j | H | key_i> for double excitation
+  ! 
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in) :: Nint 
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe
+
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, ispin, jspin
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  integer                       :: other_spin(2)
+  double precision              :: phase
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hmono = 0.d0
+  htwoe = 0.d0
+
+  if(degree .ne. 2) then
+    return
+  endif
+
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+  if(s1 .ne. s2) then
+
+    htwoe = mo_bi_ortho_coul_e(p2,p1,h2,h1) 
+
+  else
+
+    ! same spin two-body 
+
+    !                    direct terms                 exchange terms 
+    htwoe = mo_bi_ortho_coul_e(p2,p1,h2,h1) - mo_bi_ortho_coul_e(p1,p2,h2,h1) 
+
+  endif
+
+  htwoe *= phase
+
+end subroutine double_hmat_bi_ortho
+
+! ---
+
+
--- a/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
+++ b/src/tc_bi_ortho/h_tc_bi_ortho_psi.irp.f
@ -0,0 +1,92 @@
+subroutine htc_bi_ortho_calc_tdav(v, u, N_st, sze)
+
+  use bitmasks
+
+  BEGIN_DOC
+    ! Application of H_TC on a vector 
+    !
+    ! v(i,istate) = \sum_j u(j,istate) H_TC(i,j), with: 
+    !   H_TC(i,j) = < Di | H_TC | Dj > 
+    !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)             :: N_st, sze
+  double precision, intent(in)    :: u(sze,N_st)
+  double precision, intent(inout) :: v(sze,N_st)
+
+  integer                         :: i, j, istate
+  double precision                :: htot
+
+  PROVIDE N_int 
+  PROVIDE psi_det
+
+
+ ! TODO : transform it with the bi-linear representation in terms of alpha-beta. 
+
+  i = 1
+  j = 1
+  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+
+  v = 0.d0
+ !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) &
+ !$OMP SHARED(N_st, sze, N_int, psi_det, u, v)       &
+ !$OMP PRIVATE(istate, i, j, htot)
+  do istate = 1, N_st
+    do i = 1, sze
+      do j = 1, sze
+        call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+        v(i,istate) = v(i,istate) + htot * u(j,istate)
+      enddo
+    enddo 
+  enddo
+ !$OMP END PARALLEL DO
+
+end 
+
+subroutine htcdag_bi_ortho_calc_tdav(v, u, N_st, sze)
+
+  use bitmasks
+
+  BEGIN_DOC
+    ! Application of (H_TC)^dagger on a vector 
+    !
+    ! v(i,istate) = \sum_j u(j,istate) H_TC(j,i), with: 
+    !   H_TC(i,j) = < Di | H_TC | Dj > 
+    !
+  END_DOC
+
+  implicit none
+
+  integer, intent(in)             :: N_st, sze
+  double precision, intent(in)    :: u(sze,N_st)
+  double precision, intent(inout) :: v(sze,N_st)
+
+  integer                         :: i, j, istate
+  double precision                :: htot
+
+  PROVIDE N_int
+  PROVIDE psi_det
+
+  i = 1
+  j = 1
+  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,j), N_int, htot)
+
+  v = 0.d0
+
+ !$OMP PARALLEL DO DEFAULT(NONE) SCHEDULE(dynamic,8) &
+ !$OMP SHARED(N_st, sze, N_int, psi_det, u, v)       &
+ !$OMP PRIVATE(istate, i, j, htot)
+  do istate = 1, N_st
+    do i = 1, sze
+      do j = 1, sze
+        call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,j), psi_det(1,1,i), N_int, htot)
+        v(i,istate) = v(i,istate) + htot * u(j,istate)
+      enddo
+    enddo
+  enddo
+ !$OMP END PARALLEL DO
+
+end 
+
--- a/src/tc_bi_ortho/normal_ordered.irp.f
+++ b/src/tc_bi_ortho/normal_ordered.irp.f
@ -0,0 +1,319 @@
+BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC 
+  ! Normal ordering of the three body interaction on the HF density
+  END_DOC 
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+
+  integer :: i,h1,p1,h2,p2
+  integer :: hh1,hh2,pp1,pp2
+  integer                        :: Ne(2)
+  integer,           allocatable :: occ(:,:)
+  integer(bit_kind), allocatable :: key_i_core(:,:)
+  double precision :: hthree_aba,hthree_aaa,hthree_aab
+  double precision :: wall0,wall1
+ 
+  PROVIDE N_int
+
+  allocate( occ(N_int*bit_kind_size,2) )
+  allocate( key_i_core(N_int,2) )
+
+  if(core_tc_op) then
+    do i = 1, N_int
+      key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1))
+      key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2))
+    enddo
+    call bitstring_to_list_ab(key_i_core,occ,Ne,N_int)
+  else
+    call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+  endif
+
+  normal_two_body_bi_orth = 0.d0
+  print*,'Providing normal_two_body_bi_orth ...'
+  call wall_time(wall0)
+
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, hthree_aba, hthree_aab, hthree_aaa) & 
+ !$OMP SHARED (N_int, n_act_orb, list_act, Ne, occ, normal_two_body_bi_orth)
+ !$OMP DO SCHEDULE (static) 
+  do hh1 = 1, n_act_orb
+    h1 = list_act(hh1) 
+    do pp1 = 1, n_act_orb
+      p1 = list_act(pp1)
+      do hh2 = 1, n_act_orb
+        h2 = list_act(hh2) 
+        do pp2 = 1, n_act_orb
+          p2 = list_act(pp2)
+          ! opposite spin double excitations 
+          call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aba)
+          ! same spin double excitations with opposite spin contributions 
+          if(h1<h2.and.p1.gt.p2)then
+           call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2
+           ! same spin double excitations with same spin contributions 
+           if(Ne(2).ge.3)then
+             call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2
+           else
+             hthree_aaa = 0.d0
+           endif
+          else
+           call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab)
+           if(Ne(2).ge.3)then
+             call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa)
+           else
+             hthree_aaa = 0.d0
+           endif
+          endif
+          normal_two_body_bi_orth(p2,h2,p1,h1) = 0.5d0*(hthree_aba + hthree_aab + hthree_aaa)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print*,'Wall time for normal_two_body_bi_orth ',wall1-wall0
+
+  deallocate( occ )
+  deallocate( key_i_core )
+
+END_PROVIDER 
+
+
+
+subroutine give_aba_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree)
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer, intent(in)           :: Nint, h1, h2, p1, p2
+  integer, intent(in)           :: Ne(2), occ(Nint*bit_kind_size,2)
+  double precision, intent(out) :: hthree
+  integer                       :: ii, i
+  double precision              :: int_direct, int_exc_12, int_exc_13, integral
+
+  !!!! double alpha/beta
+  hthree = 0.d0
+  do ii = 1, Ne(2) ! purely closed shell part 
+    i = occ(ii,2)
+    call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral)
+    int_direct = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral)
+    int_exc_13 = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral)
+    int_exc_12 = -1.d0 * integral
+    hthree += 2.d0 * int_direct - 1.d0 * ( int_exc_13 + int_exc_12)
+  enddo
+  do ii = Ne(2) + 1, Ne(1) ! purely open-shell part 
+   i = occ(ii,1)
+    call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral)
+    int_direct = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral)
+    int_exc_13 = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2, i,p1,i,h2,h1,integral)
+    int_exc_12 = -1.d0 * integral
+    hthree += 1.d0 * int_direct - 0.5d0* ( int_exc_13 + int_exc_12)
+  enddo
+
+end subroutine give_aba_contraction
+
+
+
+BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_ab, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  ! Normal ordered two-body sector of the three-body terms for opposite spin double excitations 
+  END_DOC
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: h1, p1, h2, p2, i
+  integer                        :: hh1, hh2, pp1, pp2
+  integer                        :: Ne(2)
+  integer,           allocatable :: occ(:,:)
+  integer(bit_kind), allocatable :: key_i_core(:,:)
+  double precision               :: hthree
+
+  PROVIDE N_int
+
+  allocate( key_i_core(N_int,2) )
+  allocate( occ(N_int*bit_kind_size,2) )
+
+  if(core_tc_op)then
+   do i = 1, N_int
+    key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1))
+    key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2))
+   enddo
+   call bitstring_to_list_ab(key_i_core,occ,Ne,N_int)
+  else
+   call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+  endif
+  normal_two_body_bi_orth_ab = 0.d0
+  do hh1 = 1, n_act_orb
+   h1 = list_act(hh1) 
+   do pp1 = 1, n_act_orb
+    p1 = list_act(pp1)
+    do hh2 = 1, n_act_orb
+     h2 = list_act(hh2) 
+     do pp2 = 1, n_act_orb
+      p2 = list_act(pp2)
+      call give_aba_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree)
+      normal_two_body_bi_orth_ab(p2,h2,p1,h1) = hthree    
+     enddo
+    enddo
+   enddo
+  enddo
+
+  deallocate( key_i_core )
+  deallocate( occ )
+
+END_PROVIDER 
+
+
+
+BEGIN_PROVIDER [ double precision, normal_two_body_bi_orth_aa_bb, (n_act_orb, n_act_orb, n_act_orb, n_act_orb)]
+
+  BEGIN_DOC
+  ! Normal ordered two-body sector of the three-body terms for same spin double excitations 
+  END_DOC
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i,ii,j,h1,p1,h2,p2
+  integer                        :: hh1,hh2,pp1,pp2
+  integer                        :: Ne(2)
+  integer,           allocatable :: occ(:,:)
+  integer(bit_kind), allocatable :: key_i_core(:,:)
+  double precision               :: hthree_aab, hthree_aaa
+
+  PROVIDE N_int
+
+  allocate( key_i_core(N_int,2) )
+  allocate( occ(N_int*bit_kind_size,2) )
+
+  if(core_tc_op)then
+    do i = 1, N_int
+      key_i_core(i,1) = xor(ref_bitmask(i,1),core_bitmask(i,1))
+      key_i_core(i,2) = xor(ref_bitmask(i,2),core_bitmask(i,2))
+    enddo
+    call bitstring_to_list_ab(key_i_core, occ, Ne, N_int)
+  else
+    call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int)
+  endif
+
+  normal_two_body_bi_orth_aa_bb = 0.d0
+  do hh1 = 1, n_act_orb
+    h1 = list_act(hh1) 
+    do pp1 = 1 , n_act_orb
+      p1 = list_act(pp1)
+      do hh2 = 1, n_act_orb
+        h2 = list_act(hh2) 
+        do pp2 = 1 , n_act_orb
+          p2 = list_act(pp2)
+          if(h1<h2.and.p1.gt.p2)then
+           call give_aab_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aab) ! exchange h1<->h2
+           if(Ne(2).ge.3)then
+             call give_aaa_contraction(N_int, h2, h1, p1, p2, Ne, occ, hthree_aaa) ! exchange h1<->h2
+           else
+             hthree_aaa = 0.d0
+           endif
+          else
+           call give_aab_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aab)
+           if(Ne(2).ge.3)then
+             call give_aaa_contraction(N_int, h1, h2, p1, p2, Ne, occ, hthree_aaa)
+           else
+             hthree_aaa = 0.d0
+           endif
+          endif
+          normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1) = hthree_aab + hthree_aaa
+        enddo
+      enddo
+    enddo
+  enddo
+
+  deallocate( key_i_core )
+  deallocate( occ )
+
+END_PROVIDER 
+
+
+
+subroutine give_aaa_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree)
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer, intent(in)           :: Nint, h1, h2, p1, p2
+  integer, intent(in)           :: Ne(2), occ(Nint*bit_kind_size,2)
+  double precision, intent(out) :: hthree
+  integer                       :: ii,i
+  double precision              :: int_direct,int_exc_12,int_exc_13,int_exc_23
+  double precision              :: integral,int_exc_l,int_exc_ll
+
+  hthree = 0.d0
+  do ii = 1, Ne(2) ! purely closed shell part 
+    i = occ(ii,2)
+    call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral)
+    int_direct = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral)
+    int_exc_l = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral)
+    int_exc_ll= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral)
+    int_exc_12= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral)
+    int_exc_13= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral)
+    int_exc_23= -1.d0 * integral
+
+    hthree +=  1.d0 * int_direct + int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23  )
+  enddo
+  do ii = Ne(2)+1,Ne(1) ! purely open-shell part 
+    i = occ(ii,1)
+    call give_integrals_3_body_bi_ort(i ,p2,p1,i,h2,h1,integral)
+    int_direct = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2,p1,i ,i,h2,h1,integral)
+    int_exc_l = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,i ,p2,i,h2,h1,integral)
+    int_exc_ll= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p2,i ,p1,i,h2,h1,integral)
+    int_exc_12= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,p2, i,i,h2,h1,integral)
+    int_exc_13= -1.d0 * integral
+    call give_integrals_3_body_bi_ort(i ,p1,p2,i,h2,h1,integral)
+    int_exc_23= -1.d0 * integral
+
+    hthree +=  1.d0 * int_direct + 0.5d0 * (int_exc_l + int_exc_ll -( int_exc_12+ int_exc_13+ int_exc_23  ))
+  enddo
+
+end subroutine give_aaa_contraction
+
+
+
+subroutine give_aab_contraction(Nint, h1, h2, p1, p2, Ne, occ, hthree)
+  implicit none
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+  integer, intent(in)           :: Nint, h1, h2, p1, p2
+  integer, intent(in)           :: Ne(2), occ(Nint*bit_kind_size,2)
+  double precision, intent(out) :: hthree
+  integer                       :: ii, i
+  double precision              :: int_direct, int_exc_12, int_exc_13, int_exc_23
+  double precision              :: integral, int_exc_l, int_exc_ll
+
+  hthree = 0.d0
+  do ii = 1, Ne(2) ! purely closed shell part 
+    i = occ(ii,2)
+    call give_integrals_3_body_bi_ort(p2,p1,i,h2,h1,i,integral)
+    int_direct = -1.d0 * integral
+    call give_integrals_3_body_bi_ort(p1,p2,i,h2,h1,i,integral)
+    int_exc_23= -1.d0 * integral
+    hthree  +=  1.d0 * int_direct - int_exc_23
+  enddo
+
+end subroutine give_aab_contraction
--- a/src/tc_bi_ortho/print_he_tc_energy.irp.f
+++ b/src/tc_bi_ortho/print_he_tc_energy.irp.f
@ -0,0 +1,142 @@
+
+! ---
+
+program print_he_tc_energy
+
+  implicit none
+
+  call print_overlap()
+
+  call print_energy1()
+
+end 
+
+! ---
+
+subroutine print_overlap()
+
+  implicit none
+  integer          :: i, j, k, l
+  double precision :: S_ij
+
+  print *, ' ao_overlap:'
+  do i = 1, ao_num
+    do j = 1, ao_num
+      print *, j, i, ao_overlap(j,i)
+    enddo
+  enddo
+
+  print *, ' mo_overlap:'
+  do i = 1, mo_num
+    do j = 1, mo_num
+
+      S_ij = 0.d0
+      do k = 1, ao_num
+        do l = 1, ao_num
+          S_ij += mo_l_coef(k,i) * ao_overlap(k,l) * mo_r_coef(l,j)
+        enddo
+      enddo
+      
+      print *, i, j, S_ij
+    enddo
+  enddo
+
+end subroutine print_overlap
+
+! ---
+
+subroutine print_energy1()
+
+  implicit none
+  integer                    :: i, j, k, l
+  double precision           :: e, n, e_tmp, n_tmp, e_ns
+  double precision, external :: ao_two_e_integral
+
+  e = 0.d0
+  n = 0.d0
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! < phi_1 phi_1 | h1 | phi_1 phi_1 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_l_coef(i,1) * ao_one_e_integrals(i,j) * mo_r_coef(j,1)
+      n_tmp += mo_l_coef(i,1) * ao_overlap(i,j)         * mo_r_coef(j,1)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! < phi_1 phi_1 | h2 | phi_1 phi_1 > 
+
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      n_tmp += mo_l_coef(i,1) * ao_overlap(i,j)         * mo_r_coef(j,1)
+      e_tmp += mo_l_coef(i,1) * ao_one_e_integrals(i,j) * mo_r_coef(j,1)
+    enddo
+  enddo
+
+  e += e_tmp * n_tmp
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! ---
+
+  e_ns = 0.d0
+
+  do i = 1, ao_num
+    do j = 1, ao_num
+      do k = 1, ao_num
+        do l = 1, ao_num
+
+          ! ao_two_e_tc_tot(i,j,k,l) = <k i| V^TC(r_12) |l j>
+          e += mo_l_coef(i,1) * mo_l_coef(k,1) * ao_two_e_tc_tot(i,j,k,l) * mo_r_coef(j,1) * mo_r_coef(l,1)
+
+          e_ns += mo_l_coef(i,1) * mo_l_coef(k,1) * ao_non_hermit_term_chemist(i,j,k,l) * mo_r_coef(j,1) * mo_r_coef(l,1)
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  ! ---
+
+  ! < phi_1 phi_1 | phi_1 phi_1 >
+  e_tmp = 0.d0
+  n_tmp = 0.d0
+  do i = 1, ao_num
+    do j = 1, ao_num
+      e_tmp += mo_l_coef(i,1) * ao_overlap(i,j) * mo_r_coef(j,1)
+      n_tmp += mo_l_coef(i,1) * ao_overlap(i,j) * mo_r_coef(j,1)
+    enddo
+  enddo
+
+  n += e_tmp * n_tmp
+
+  ! ---
+
+  ! --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
+
+  e    = e    / n 
+  e_ns = e_ns / n
+
+  print *, ' tc      energy = ', e
+  print *, ' non-sym energy = ', e_ns
+
+end subroutine print_energy1
+
+! ---
+
+
--- a/src/tc_bi_ortho/print_tc_wf.irp.f
+++ b/src/tc_bi_ortho/print_tc_wf.irp.f
@ -0,0 +1,104 @@
+program print_tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+!  if(three_body_h_tc)then
+!   call provide_all_three_ints_bi_ortho
+!  endif
+!  call routine
+ call write_l_r_wf
+end
+
+subroutine write_l_r_wf
+ implicit none
+ character*(128) :: output
+ integer :: i_unit_output,getUnitAndOpen                                                                                 
+ output=trim(ezfio_filename)//'.tc_wf'
+ i_unit_output = getUnitAndOpen(output,'w')
+ integer :: i
+ print*,'Writing the left-right wf'
+ do i = 1, N_det
+  write(i_unit_output,*)i,psi_l_coef_sorted_bi_ortho_left(i),psi_r_coef_sorted_bi_ortho_right(i)
+ enddo
+
+
+end
+
+subroutine routine
+ implicit none
+ integer :: i,degree
+ integer          :: exc(0:2,2,2),h1,p1,s1,h2,p2,s2
+ double precision :: hmono,htwoe,hthree,htilde_ij,coef_pt1,e_i0,delta_e,e_pt2
+ double precision :: contrib_pt,e_corr,coef,contrib,phase
+ double precision :: accu_positive,accu_positive_pt, accu_positive_core,accu_positive_core_pt
+ e_pt2 = 0.d0
+ accu_positive = 0.D0
+ accu_positive_pt = 0.D0
+ accu_positive_core = 0.d0
+ accu_positive_core_pt = 0.d0
+ 
+ do i = 1, N_det
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+   if(degree == 1 .or. degree == 2)then
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i),HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i),psi_det(1,1,i),N_int,hmono,htwoe,hthree,e_i0)
+    delta_e = e_tilde_00 - e_i0
+    coef_pt1 = htilde_ij / delta_e
+ 
+    call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,htilde_ij)
+    contrib_pt = coef_pt1 * htilde_ij
+    e_pt2 += contrib_pt
+ 
+    coef = psi_r_coef_bi_ortho(i,1)/psi_r_coef_bi_ortho(1,1)
+    contrib = coef * htilde_ij
+    e_corr += contrib
+    call get_excitation(HF_bitmask,psi_det(1,1,i),exc,degree,phase,N_int)
+    call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
+    print*,'*********'
+    if(degree==1)then
+     print*,'s1',s1
+     print*,'h1,p1 = ',h1,p1
+    else if(degree ==2)then
+     print*,'s1',s1
+     print*,'h1,p1 = ',h1,p1
+     print*,'s2',s2
+     print*,'h2,p2 = ',h2,p2
+    endif
+    print*,'coef_pt1 = ',coef_pt1
+    print*,'coef     = ',coef
+    print*,'contrib_pt ',contrib_pt
+    print*,'contrib  = ',contrib
+    if(contrib.gt.0.d0)then
+     accu_positive    += contrib
+     if(h1==1.or.h2==1)then
+      accu_positive_core += contrib
+     endif
+     if(dabs(contrib).gt.1.d-5)then
+      print*,'Found a positive contribution to correlation energy !!'
+     endif
+    endif
+    if(contrib_pt.gt.0.d0)then
+     accu_positive_pt += contrib_pt
+     if(h2==1.or.h1==1)then
+      accu_positive_core_pt += contrib_pt
+     endif
+    endif
+   endif
+ enddo
+ print*,''
+ print*,''
+ print*,'Total correlation energy            = ',e_corr
+ print*,'Total correlation energy PT         = ',e_pt2
+ print*,'Positive contribution to ecorr      = ',accu_positive
+ print*,'Positive contribution to ecorr PT   = ',accu_positive_pt
+ print*,'Pure core contribution              = ',accu_positive_core
+ print*,'Pure core contribution PT           = ',accu_positive_core_pt
+end
--- a/src/tc_bi_ortho/psi_det_tc_sorted.irp.f
+++ b/src/tc_bi_ortho/psi_det_tc_sorted.irp.f
@ -0,0 +1,157 @@
+use bitmasks
+
+BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_tc, (psi_det_size) ]                                                                
+  implicit none
+  BEGIN_DOC
+  ! Contribution of determinants to the state-averaged density.
+  END_DOC
+  integer                        :: i,j,k
+  double precision               :: f
+    
+  psi_average_norm_contrib_tc(:) = 0.d0
+  do k=1,N_states
+    do i=1,N_det
+      psi_average_norm_contrib_tc(i) = psi_average_norm_contrib_tc(i) +    &
+          dabs(psi_l_coef_bi_ortho(i,k)*psi_r_coef_bi_ortho(i,k))*state_average_weight(k)
+    enddo
+  enddo
+  f = 1.d0/sum(psi_average_norm_contrib_tc(1:N_det))
+  do i=1,N_det
+    psi_average_norm_contrib_tc(i) = psi_average_norm_contrib_tc(i)*f
+  enddo
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc, (psi_det_size,N_states) ]
+&BEGIN_PROVIDER [ double precision, psi_average_norm_contrib_sorted_tc, (psi_det_size) ]
+&BEGIN_PROVIDER [ integer, psi_det_sorted_tc_order, (psi_det_size) ]
+   implicit none
+   BEGIN_DOC
+   ! Wave function sorted by determinants contribution to the norm (state-averaged)
+   !
+   ! psi_det_sorted_tc_order(i) -> k : index in psi_det
+   END_DOC
+   integer                        :: i,j,k
+   integer, allocatable           :: iorder(:)
+   allocate ( iorder(N_det) )
+   do i=1,N_det
+     psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_tc(i)
+     iorder(i) = i
+   enddo
+   call dsort(psi_average_norm_contrib_sorted_tc,iorder,N_det)
+   do i=1,N_det
+     do j=1,N_int
+       psi_det_sorted_tc(j,1,i) = psi_det(j,1,iorder(i))
+       psi_det_sorted_tc(j,2,i) = psi_det(j,2,iorder(i))
+     enddo
+    psi_average_norm_contrib_sorted_tc(i) = -psi_average_norm_contrib_sorted_tc(i)
+    psi_det_sorted_tc_order(iorder(i)) = i
+   enddo
+   double precision :: accu
+   do k=1,N_states
+    accu = 0.d0
+    do i=1,N_det
+     psi_coef_sorted_tc(i,k) = dsqrt(dabs(psi_l_coef_bi_ortho(iorder(i),k)*psi_r_coef_bi_ortho(iorder(i),k)))
+     accu += psi_coef_sorted_tc(i,k)**2
+    enddo
+    accu = 1.d0/dsqrt(accu)
+    do i=1,N_det
+     psi_coef_sorted_tc(i,k) *= accu
+    enddo
+   enddo
+
+   psi_det_sorted_tc(:,:,N_det+1:psi_det_size) = 0_bit_kind
+   psi_coef_sorted_tc(N_det+1:psi_det_size,:) = 0.d0
+   psi_average_norm_contrib_sorted_tc(N_det+1:psi_det_size) = 0.d0
+   psi_det_sorted_tc_order(N_det+1:psi_det_size) = 0
+
+   deallocate(iorder)
+
+END_PROVIDER
+
+ BEGIN_PROVIDER [double precision, psi_r_coef_sorted_bi_ortho, (psi_det_size, N_states)]
+&BEGIN_PROVIDER [double precision, psi_l_coef_sorted_bi_ortho, (psi_det_size, N_states)]
+ BEGIN_DOC
+ ! psi_r_coef_sorted_bi_ortho : right coefficients corresponding to psi_det_sorted_tc
+ ! psi_l_coef_sorted_bi_ortho : left  coefficients corresponding to psi_det_sorted_tc
+ END_DOC
+   implicit none
+   integer                       :: i, j, k
+   psi_r_coef_sorted_bi_ortho = 0.d0
+   psi_l_coef_sorted_bi_ortho = 0.d0
+   do i = 1, N_det
+    psi_r_coef_sorted_bi_ortho(i,1) = psi_r_coef_bi_ortho(psi_det_sorted_tc_order(i),1)
+    psi_l_coef_sorted_bi_ortho(i,1) = psi_l_coef_bi_ortho(psi_det_sorted_tc_order(i),1)
+   enddo
+
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc_bit, (N_int,2,psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, psi_coef_sorted_tc_bit, (psi_det_size,N_states) ]
+   implicit none
+   BEGIN_DOC
+   ! Determinants on which we apply $\langle i|H|psi \rangle$ for perturbation.
+   ! They are sorted by determinants interpreted as integers. Useful
+   ! to accelerate the search of a random determinant in the wave
+   ! function.
+   END_DOC
+
+   call sort_dets_by_det_search_key(N_det, psi_det, psi_coef, size(psi_coef,1),       &
+       psi_det_sorted_tc_bit, psi_coef_sorted_tc_bit, N_states)
+
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc_right, (N_int,2,N_det) ]
+&BEGIN_PROVIDER [double precision, psi_r_coef_sorted_bi_ortho_right, (N_det)]
+ implicit none
+ BEGIN_DOC
+ ! psi_det_sorted_tc_right : Slater determinants sorted by decreasing value of |right- coefficients|
+ ! 
+ ! psi_r_coef_sorted_bi_ortho_right : right wave function according to psi_det_sorted_tc_right
+ END_DOC
+ integer, allocatable           :: iorder(:)
+ double precision, allocatable :: coef(:)
+ integer :: i,j
+ allocate ( iorder(N_det) , coef(N_det))
+ do i=1,N_det
+   coef(i) = -dabs(psi_r_coef_bi_ortho(i,1)/psi_r_coef_bi_ortho(1,1))
+   iorder(i) = i
+ enddo
+ call dsort(coef,iorder,N_det)
+ do i=1,N_det
+   do j=1,N_int
+     psi_det_sorted_tc_right(j,1,i) = psi_det(j,1,iorder(i))
+     psi_det_sorted_tc_right(j,2,i) = psi_det(j,2,iorder(i))
+   enddo
+  psi_r_coef_sorted_bi_ortho_right(i) = psi_r_coef_bi_ortho(iorder(i),1)/psi_r_coef_bi_ortho(iorder(1),1)
+ enddo
+END_PROVIDER 
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_det_sorted_tc_left, (N_int,2,N_det) ]
+&BEGIN_PROVIDER [double precision, psi_l_coef_sorted_bi_ortho_left, (N_det)]
+ implicit none
+ BEGIN_DOC
+ ! psi_det_sorted_tc_left : Slater determinants sorted by decreasing value of |LEFTt- coefficients|
+ ! 
+ ! psi_r_coef_sorted_bi_ortho_left : LEFT wave function according to psi_det_sorted_tc_left
+ END_DOC
+ integer, allocatable           :: iorder(:)
+ double precision, allocatable :: coef(:)
+ integer :: i,j
+ allocate ( iorder(N_det) , coef(N_det))
+ do i=1,N_det
+   coef(i) = -dabs(psi_l_coef_bi_ortho(i,1)/psi_r_coef_bi_ortho(1,1))
+   iorder(i) = i
+ enddo
+ call dsort(coef,iorder,N_det)
+ do i=1,N_det
+   do j=1,N_int
+     psi_det_sorted_tc_left(j,1,i) = psi_det(j,1,iorder(i))
+     psi_det_sorted_tc_left(j,2,i) = psi_det(j,2,iorder(i))
+   enddo
+  psi_l_coef_sorted_bi_ortho_left(i) = psi_l_coef_bi_ortho(iorder(i),1)/psi_l_coef_bi_ortho(iorder(1),1)
+ enddo
+END_PROVIDER 
--- a/src/tc_bi_ortho/psi_left_qmc.irp.f
+++ b/src/tc_bi_ortho/psi_left_qmc.irp.f
@ -0,0 +1,44 @@
+
+! ---
+
+BEGIN_PROVIDER  [ double precision, psi_bitcleft_bilinear_matrix_values, (N_det,N_states) ]
+
+  BEGIN_DOC
+  ! Sparse coefficient matrix if the wave function is expressed in a bilinear form :
+  !  $D_\alpha^\dagger.C.D_\beta$
+  !
+  ! Rows are $\alpha$ determinants and columns are $\beta$.
+  !
+  ! Order refers to psi_det
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer :: k, l
+
+  if(N_det .eq. 1) then
+
+    do l = 1, N_states
+      psi_bitcleft_bilinear_matrix_values(1,l) = 1.d0
+    enddo
+
+  else
+
+    do l = 1, N_states
+      do k = 1, N_det
+        psi_bitcleft_bilinear_matrix_values(k,l) = psi_l_coef_bi_ortho(k,l)
+      enddo
+    enddo
+
+    PROVIDE psi_bilinear_matrix_order
+    do l = 1, N_states
+      call dset_order(psi_bitcleft_bilinear_matrix_values(1,l), psi_bilinear_matrix_order, N_det)
+    enddo
+
+  endif
+
+END_PROVIDER
+
+! ---
+
--- a/src/tc_bi_ortho/psi_r_l_prov.irp.f
+++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f
@ -0,0 +1,234 @@
+use bitmasks
+
+BEGIN_PROVIDER [ double precision, psi_l_coef_bi_ortho, (psi_det_size,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! The wave function coefficients. Initialized with Hartree-Fock if the |EZFIO| file
+  ! is empty.
+  END_DOC
+
+  integer                        :: i,k, N_int2
+  logical                        :: exists
+  character*(64)                 :: label
+
+  PROVIDE read_wf N_det mo_label ezfio_filename nproc
+  psi_l_coef_bi_ortho = 0.d0
+  do i=1,min(N_states,N_det)
+    psi_l_coef_bi_ortho(i,i) = 1.d0
+  enddo
+
+  if (mpi_master) then
+    if (read_wf) then
+      call ezfio_has_tc_bi_ortho_psi_l_coef_bi_ortho(exists)
+!      if (exists) then
+!        call ezfio_has_tc_bi_ortho_mo_label(exists)
+!        if (exists) then
+!          call ezfio_get_tc_bi_ortho_mo_label(label)
+!          exists = (label == mo_label)
+!        endif
+!      endif
+
+      if (exists) then
+
+        double precision, allocatable  :: psi_l_coef_bi_ortho_read(:,:)
+        allocate (psi_l_coef_bi_ortho_read(N_det,N_states))
+        print *,  'Read psi_l_coef_bi_ortho', N_det, N_states
+        call ezfio_get_tc_bi_ortho_psi_l_coef_bi_ortho(psi_l_coef_bi_ortho_read)
+        do k=1,N_states
+          do i=1,N_det
+            psi_l_coef_bi_ortho(i,k) = psi_l_coef_bi_ortho_read(i,k)
+          enddo
+        enddo
+        deallocate(psi_l_coef_bi_ortho_read)
+
+      else
+
+        print*, 'psi_l_coef_bi_ortho are psi_coef'
+        do k=1,N_states
+          do i=1,N_det
+            psi_l_coef_bi_ortho(i,k) = psi_coef(i,k)
+          enddo
+        enddo
+
+      endif
+    endif
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer                        :: ierr
+    call     MPI_BCAST( psi_l_coef_bi_ortho, size(psi_l_coef_bi_ortho), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read psi_l_coef_bi_ortho with MPI'
+    endif
+  IRP_ENDIF
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! The wave function coefficients. Initialized with Hartree-Fock if the |EZFIO| file
+  ! is empty.
+  END_DOC
+
+  integer                        :: i,k, N_int2
+  logical                        :: exists
+  character*(64)                 :: label
+
+  PROVIDE read_wf N_det mo_label ezfio_filename nproc
+  psi_r_coef_bi_ortho = 0.d0
+  do i=1,min(N_states,N_det)
+    psi_r_coef_bi_ortho(i,i) = 1.d0
+  enddo
+
+  if (mpi_master) then
+    if (read_wf) then
+      call ezfio_has_tc_bi_ortho_psi_r_coef_bi_ortho(exists)
+!      if (exists) then
+!        call ezfio_has_tc_bi_ortho_mo_label(exists)
+!        if (exists) then
+!          call ezfio_get_tc_bi_ortho_mo_label(label)
+!          exists = (label == mo_label)
+!        endif
+!      endif
+
+      if (exists) then
+
+        double precision, allocatable  :: psi_r_coef_bi_ortho_read(:,:)
+        allocate (psi_r_coef_bi_ortho_read(N_det,N_states))
+        print *,  'Read psi_r_coef_bi_ortho', N_det, N_states
+        call ezfio_get_tc_bi_ortho_psi_r_coef_bi_ortho(psi_r_coef_bi_ortho_read)
+        do k=1,N_states
+          do i=1,N_det
+            psi_r_coef_bi_ortho(i,k) = psi_r_coef_bi_ortho_read(i,k)
+          enddo
+        enddo
+        deallocate(psi_r_coef_bi_ortho_read)
+
+      else
+
+        print*, 'psi_r_coef_bi_ortho are psi_coef'
+        do k=1,N_states
+          do i=1,N_det
+            psi_r_coef_bi_ortho(i,k) = psi_coef(i,k)
+          enddo
+        enddo
+
+      endif
+    endif
+  endif
+  IRP_IF MPI_DEBUG
+    print *,  irp_here, mpi_rank
+    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
+  IRP_ENDIF
+  IRP_IF MPI
+    include 'mpif.h'
+    integer                        :: ierr
+    call     MPI_BCAST( psi_r_coef_bi_ortho, size(psi_r_coef_bi_ortho), MPI_DOUBLE_PRECISION, 0, MPI_COMM_WORLD, ierr)
+    if (ierr /= MPI_SUCCESS) then
+      stop 'Unable to read psi_r_coef_bi_ortho with MPI'
+    endif
+  IRP_ENDIF
+END_PROVIDER
+
+
+subroutine save_tc_wavefunction_general(ndet,nstates,psidet,dim_psicoef,psilcoef,psircoef)
+  implicit none
+  BEGIN_DOC
+  !  Save the wave function into the |EZFIO| file
+  END_DOC
+  use bitmasks
+  include 'constants.include.F'
+  integer, intent(in)            :: ndet,nstates,dim_psicoef
+  integer(bit_kind), intent(in)  :: psidet(N_int,2,ndet)
+  double precision, intent(in)   :: psilcoef(dim_psicoef,nstates)
+  double precision, intent(in)   :: psircoef(dim_psicoef,nstates)
+  integer*8, allocatable         :: psi_det_save(:,:,:)
+  double precision, allocatable  :: psil_coef_save(:,:)
+  double precision, allocatable  :: psir_coef_save(:,:)
+
+  double precision               :: accu_norm
+  integer                        :: i,j,k, ndet_qp_edit
+
+  if (mpi_master) then
+    ndet_qp_edit = min(ndet,N_det_qp_edit)
+
+    call ezfio_set_determinants_N_int(N_int)
+    call ezfio_set_determinants_bit_kind(bit_kind)
+    call ezfio_set_determinants_N_det(ndet)
+    call ezfio_set_determinants_N_det_qp_edit(ndet_qp_edit)
+    call ezfio_set_determinants_n_states(nstates)
+    call ezfio_set_determinants_mo_label(mo_label)
+
+    allocate (psi_det_save(N_int,2,ndet))
+    do i=1,ndet
+      do j=1,2
+        do k=1,N_int
+          psi_det_save(k,j,i) = transfer(psidet(k,j,i),1_8)
+        enddo
+      enddo
+    enddo
+    call ezfio_set_determinants_psi_det(psi_det_save)
+    call ezfio_set_determinants_psi_det_qp_edit(psi_det_save)
+    deallocate (psi_det_save)
+
+    allocate (psil_coef_save(ndet,nstates),psir_coef_save(ndet,nstates))
+    do k=1,nstates
+      do i=1,ndet
+        psil_coef_save(i,k) = psilcoef(i,k)
+        psir_coef_save(i,k) = psircoef(i,k)
+      enddo
+    enddo
+
+    call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(psil_coef_save)
+    call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(psir_coef_save)
+    deallocate (psil_coef_save,psir_coef_save)
+
+!    allocate (psi_coef_save(ndet_qp_edit,nstates))
+!    do k=1,nstates
+!      do i=1,ndet_qp_edit
+!        psi_coef_save(i,k) = psicoef(i,k)
+!      enddo
+!    enddo
+!
+!    call ezfio_set_determinants_psi_coef_qp_edit(psi_coef_save)
+!    deallocate (psi_coef_save)
+
+    call write_int(6,ndet,'Saved determinantsi and psi_r/psi_l coef')
+  endif
+end
+
+subroutine save_tc_bi_ortho_wavefunction
+ implicit none
+ call save_tc_wavefunction_general(N_det,N_states,psi_det,size(psi_l_coef_bi_ortho, 1),psi_l_coef_bi_ortho,psi_r_coef_bi_ortho)
+ call routine_save_right_bi_ortho
+end
+
+subroutine routine_save_right_bi_ortho
+ implicit none
+ double precision, allocatable :: coef_tmp(:,:)
+ integer :: i
+ allocate(coef_tmp(N_det, N_states))
+ do i = 1, N_det
+  coef_tmp(i,1:N_states) = psi_r_coef_bi_ortho(i,1:N_states)
+ enddo
+ call save_wavefunction_general_unormalized(N_det,N_states,psi_det,size(coef_tmp,1),coef_tmp(1,1))
+end                     
+
+subroutine routine_save_left_right_bi_ortho
+ implicit none
+ double precision, allocatable :: coef_tmp(:,:)
+ integer :: i,n_states_tmp
+ n_states_tmp = 2
+ allocate(coef_tmp(N_det, n_states_tmp))
+ do i = 1, N_det
+  coef_tmp(i,1) = psi_r_coef_bi_ortho(i,1)
+  coef_tmp(i,2) = psi_l_coef_bi_ortho(i,1)
+ enddo
+ call save_wavefunction_general_unormalized(N_det,n_states_tmp,psi_det,size(coef_tmp,1),coef_tmp(1,1))
+end
+
--- a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet
+++ b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet
@ -0,0 +1,76 @@
+program save_bitcpsileft_for_qmcchem
+
+  integer          :: iunit
+  logical          :: exists
+  double precision :: e_ref
+
+  print *, ' '
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
+  print *, '  call save_for_qmcchem before  '
+  print *, ' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'
+  print *, ' '
+
+  call write_lr_spindeterminants()
+
+  e_ref = 0.d0
+  iunit = 13
+  open(unit=iunit, file=trim(ezfio_filename)//'/simulation/e_ref', action='write')
+
+    call ezfio_has_fci_energy_pt2(exists)
+    if(.not.exists) then
+
+      call ezfio_has_fci_energy(exists)
+      if(.not.exists) then
+
+        call ezfio_has_cisd_energy(exists)
+        if(.not.exists) then
+
+          call ezfio_has_tc_scf_bitc_energy(exists)
+          if(exists) then
+            call ezfio_get_tc_scf_bitc_energy(e_ref)
+          endif
+
+        else
+          call ezfio_get_cisd_energy(e_ref)
+        endif
+
+      else
+        call ezfio_get_fci_energy(e_ref)
+      endif
+
+      else
+        call ezfio_get_fci_energy_pt2(e_ref)
+    endif
+
+    write(iunit,*) e_ref
+
+  close(iunit)
+
+end
+
+! --
+
+subroutine write_lr_spindeterminants()
+
+  use bitmasks
+
+  implicit none
+
+  integer                       :: k, l
+  double precision, allocatable :: buffer(:,:)
+
+  PROVIDE psi_bitcleft_bilinear_matrix_values
+
+  allocate(buffer(N_det,N_states))
+  do l = 1, N_states
+    do k = 1, N_det
+      buffer(k,l) = psi_bitcleft_bilinear_matrix_values(k,l)
+    enddo
+  enddo
+  call ezfio_set_spindeterminants_psi_left_coef_matrix_values(buffer)
+  deallocate(buffer)
+
+end subroutine write_lr_spindeterminants
+
+! ---
+
--- a/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f
+++ b/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f
@ -0,0 +1,15 @@
+program tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+  call routine_save_left_right_bi_ortho
+! call test
+end
--- a/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f
+++ b/src/tc_bi_ortho/save_tc_bi_ortho_nat.irp.f
@ -0,0 +1,35 @@
+ program tc_natorb_bi_ortho
+   implicit none
+   BEGIN_DOC
+ ! TODO : Put the documentation of the program here
+   END_DOC
+   print *, 'Hello world'
+   my_grid_becke = .True.
+   my_n_pt_r_grid = 30
+   my_n_pt_a_grid = 50
+   read_wf = .True.
+   touch read_wf
+   touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+   call print_energy_and_mos
+   call save_tc_natorb
+!   call minimize_tc_orb_angles
+ end
+ 
+ subroutine save_tc_natorb 
+  implicit none
+  print*,'Saving the natorbs '
+  provide natorb_tc_leigvec_ao natorb_tc_reigvec_ao
+  call ezfio_set_bi_ortho_mos_mo_l_coef(natorb_tc_leigvec_ao)
+  call ezfio_set_bi_ortho_mos_mo_r_coef(natorb_tc_reigvec_ao)
+  call save_ref_determinant_nstates_1
+  call ezfio_set_determinants_read_wf(.False.)
+ end
+ 
+ subroutine save_ref_determinant_nstates_1
+   implicit none
+   use bitmasks
+   double precision               :: buffer(1,N_states)
+   buffer = 0.d0
+   buffer(1,1) = 1.d0
+   call save_wavefunction_general(1,1,ref_bitmask,1,buffer)
+ end
--- a/src/tc_bi_ortho/select_dets_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/select_dets_bi_ortho.irp.f
@ -0,0 +1,61 @@
+program tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  !!!!!!!!!!!!!!! WARNING NO 3-BODY
+  !!!!!!!!!!!!!!! WARNING NO 3-BODY
+  three_body_h_tc = .False.
+  touch three_body_h_tc 
+  !!!!!!!!!!!!!!! WARNING NO 3-BODY
+  !!!!!!!!!!!!!!! WARNING NO 3-BODY
+
+  call routine_test
+! call test
+end
+
+subroutine routine_test
+ implicit none
+ use bitmasks ! you need to include the bitmasks_module.f90 features
+ integer :: i,n_good,degree
+ integer(bit_kind), allocatable :: dets(:,:,:)
+ integer, allocatable :: iorder(:)
+ double precision, allocatable :: coef(:),coef_new(:,:)
+ double precision :: thr
+ allocate(coef(N_det), iorder(N_det))
+ do i = 1, N_det
+  iorder(i) = i
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+  if(degree==1)then
+   coef(i) = -0.5d0
+  else
+   coef(i) = -dabs(coef_pt1_bi_ortho(i))
+  endif
+ enddo
+ call dsort(coef,iorder,N_det)
+ !thr = save_threshold
+ thr = 1d-15
+ n_good = 0
+ do i = 1, N_det
+  if(dabs(coef(i)).gt.thr)then
+   n_good += 1
+  endif
+ enddo
+ print*,'n_good = ',n_good
+ allocate(dets(N_int,2,n_good),coef_new(n_good,n_states))
+ do i = 1, n_good
+  dets(:,:,i) = psi_det(:,:,iorder(i))
+  coef_new(i,:) = psi_coef(iorder(i),:)
+ enddo
+ call save_wavefunction_general(n_good,n_states,dets,n_good,coef_new)
+
+
+end
--- a/src/tc_bi_ortho/slater_tc.irp.f
+++ b/src/tc_bi_ortho/slater_tc.irp.f
@ -0,0 +1,376 @@
+
+! ---
+
+subroutine htilde_mu_mat_bi_ortho_tot(key_j, key_i, Nint, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer, intent(in)           :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2)
+  double precision, intent(out) :: htot
+  double precision              :: hmono, htwoe, hthree
+  integer :: degree
+
+  call get_excitation_degree(key_j, key_i, degree, Nint)
+  if(degree.gt.2)then
+    htot = 0.d0
+  else
+    call htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+  endif
+
+end subroutine htilde_mu_mat_bi_ortho_tot
+
+! --
+
+subroutine htilde_mu_mat_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+
+  BEGIN_DOC
+  !
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  ! Returns the detail of the matrix element in terms of single, two and three electron contribution. 
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
+  integer                       :: degree 
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+  hthree = 0.D0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.gt.2) return
+
+  if(degree == 0)then
+    call diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot)
+  else if (degree == 1)then
+    call single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+  else if(degree == 2)then
+    call double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+  endif
+
+  if(three_body_h_tc) then
+    if(degree == 2) then
+      if(.not.double_normal_ord) then
+        call double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+      endif
+    else if(degree == 1) then
+      call single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+    else if(degree == 0) then
+      call diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
+    endif
+  endif
+
+  htot = hmono + htwoe + hthree
+  if(degree==0) then
+    htot += nuclear_repulsion
+  endif
+ 
+end
+
+! ---
+
+subroutine diag_htilde_mu_mat_bi_ortho(Nint, key_i, hmono, htwoe, htot)
+
+  BEGIN_DOC
+  !  diagonal element of htilde ONLY FOR ONE- AND TWO-BODY TERMS 
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in)  :: Nint
+  integer(bit_kind), intent(in)  :: key_i(Nint,2)
+  double precision, intent(out)  :: hmono,htwoe,htot
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  double precision               :: get_mo_two_e_integral_tc_int
+  integer(bit_kind)              :: key_i_core(Nint,2)
+
+!  PROVIDE mo_two_e_integrals_tc_int_in_map mo_bi_ortho_tc_two_e
+!
+!  PROVIDE mo_integrals_erf_map core_energy nuclear_repulsion core_bitmask
+!  PROVIDE core_fock_operator
+!
+!  PROVIDE j1b_gauss
+
+!  if(core_tc_op)then
+!   print*,'core_tc_op not already taken into account for bi ortho'
+!   print*,'stopping ...'
+!   stop
+!   do i = 1, Nint
+!    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+!    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+!   enddo
+!   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
+!   hmono = core_energy - nuclear_repulsion
+!  else
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+   hmono = 0.d0
+!  endif
+  htwoe= 0.d0
+  htot = 0.d0
+
+  do ispin = 1, 2 
+   do i = 1, Ne(ispin) ! 
+    ii = occ(i,ispin) 
+    hmono += mo_bi_ortho_tc_one_e(ii,ii)
+
+!    if(j1b_gauss .eq. 1) then
+!      print*,'j1b not implemented for bi ortho TC'
+!      print*,'stopping  ....'
+!      stop
+!      !hmono += mo_j1b_gauss_hermI  (ii,ii) &
+!      !       + mo_j1b_gauss_hermII (ii,ii) &
+!      !       + mo_j1b_gauss_nonherm(ii,ii)
+!    endif
+
+!    if(core_tc_op)then
+!   print*,'core_tc_op not already taken into account for bi ortho'
+!   print*,'stopping ...'
+!   stop
+!     hmono += core_fock_operator(ii,ii) ! add the usual Coulomb - Exchange from the core 
+!    endif
+   enddo
+  enddo
+
+
+   ! alpha/beta two-body
+   ispin = 1
+   jspin = 2 
+   do i = 1, Ne(ispin) ! electron 1 (so it can be associated to mu(r1))
+    ii = occ(i,ispin) 
+    do j = 1, Ne(jspin) ! electron 2 
+     jj = occ(j,jspin) 
+     htwoe += mo_bi_ortho_tc_two_e(jj,ii,jj,ii) 
+    enddo
+   enddo
+ 
+   ! alpha/alpha two-body
+   do i = 1, Ne(ispin)
+    ii = occ(i,ispin) 
+    do j = i+1, Ne(ispin)
+     jj = occ(j,ispin) 
+     htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii)
+    enddo
+   enddo
+ 
+   ! beta/beta two-body
+   do i = 1, Ne(jspin)
+    ii = occ(i,jspin) 
+    do j = i+1, Ne(jspin)
+     jj = occ(j,jspin) 
+     htwoe += mo_bi_ortho_tc_two_e(ii,jj,ii,jj) - mo_bi_ortho_tc_two_e(ii,jj,jj,ii)
+    enddo
+   enddo
+  htot = hmono + htwoe 
+
+end
+
+
+
+subroutine double_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for double excitation  ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint 
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, htot
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_i_core(Nint,2)
+  double precision              :: get_mo_two_e_integral_tc_int,phase
+
+!  PROVIDE mo_two_e_integrals_tc_int_in_map mo_bi_ortho_tc_two_e
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hmono = 0.d0
+  htwoe= 0.d0
+  htot = 0.d0
+
+  if(degree.ne.2)then
+   return
+  endif
+
+!  if(core_tc_op)then
+!   print*,'core_tc_op not already taken into account for bi ortho'
+!   print*,'stopping ...'
+!   stop
+!   do i = 1, Nint
+!    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+!    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+!   enddo
+!   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
+!  else
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+!  endif
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+  if(s1.ne.s2)then
+   ! opposite spin two-body 
+!   key_j, key_i
+    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
+    if(double_normal_ord.and.+Ne(1).gt.2)then
+     htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ???
+    endif
+  else
+   ! same spin two-body 
+   ! direct terms 
+   htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
+   ! exchange terms 
+   htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
+   if(double_normal_ord.and.+Ne(1).gt.2)then
+    htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ???
+    htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ???
+   endif
+  endif
+  htwoe *= phase
+  htot =  htwoe 
+
+end
+
+
+subroutine single_htilde_mu_mat_bi_ortho(Nint, key_j, key_i, hmono, htwoe, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, htot
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int, phase
+  double precision              :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_j_core(Nint,2), key_i_core(Nint,2)
+
+!  PROVIDE mo_two_e_integrals_tc_int_in_map mo_bi_ortho_tc_two_e
+!
+!  PROVIDE core_bitmask core_fock_operator mo_integrals_erf_map
+
+!  PROVIDE j1b_gauss
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  hmono = 0.d0
+  htwoe= 0.d0
+  htot = 0.d0
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.ne.1)then
+   return
+  endif
+!  if(core_tc_op)then
+!   print*,'core_tc_op not already taken into account for bi ortho'
+!   print*,'stopping ...'
+!   stop
+!   do i = 1, Nint
+!    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+!    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+!    key_j_core(i,1) = xor(key_j(i,1),core_bitmask(i,1))
+!    key_j_core(i,2) = xor(key_j(i,2),core_bitmask(i,2))
+!   enddo
+!   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
+!  else
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+!  endif
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc,1,h1,p1,h2,p2,s1,s2)
+!  if(h1==14.and.p1==2)then
+!   print*,'h1,p1 old = ',h1,p1
+!  endif
+
+  hmono = mo_bi_ortho_tc_one_e(p1,h1) * phase
+
+!  if(j1b_gauss .eq. 1) then
+!     print*,'j1b not implemented for bi ortho TC'
+!     print*,'stopping  ....'
+!     stop
+!    !hmono += ( mo_j1b_gauss_hermI  (h1,p1) &
+!    !         + mo_j1b_gauss_hermII (h1,p1) &
+!    !         + mo_j1b_gauss_nonherm(h1,p1) ) * phase
+!  endif
+
+!  if(core_tc_op)then
+!   print*,'core_tc_op not already taken into account for bi ortho'
+!   print*,'stopping ...'
+!   stop
+!   hmono += phase * core_fock_operator(h1,p1)
+!  endif
+  
+   ! alpha/beta two-body 
+   ispin = other_spin(s1)
+   if(s1==1)then
+    ! single alpha 
+    do i = 1, Ne(ispin) ! electron 2 
+     ii = occ(i,ispin) 
+     htwoe += mo_bi_ortho_tc_two_e(ii,p1,ii,h1) 
+    enddo
+   else
+    ! single beta 
+    do i = 1, Ne(ispin) ! electron 1 
+     ii = occ(i,ispin) 
+     htwoe += mo_bi_ortho_tc_two_e(p1,ii,h1,ii) 
+    enddo
+   endif
+!   ! same spin two-body 
+   do i = 1, Ne(s1)
+    ii = occ(i,s1) 
+    ! (h1p1|ii ii) - (h1 ii| p1 ii)
+    htwoe += mo_bi_ortho_tc_two_e(ii,p1,ii,h1) - mo_bi_ortho_tc_two_e(p1,ii,ii,h1) 
+   enddo
+   
+  htwoe  *= phase
+  htot = hmono + htwoe 
+
+end
+
+
--- a/src/tc_bi_ortho/slater_tc_3e.irp.f
+++ b/src/tc_bi_ortho/slater_tc_3e.irp.f
@ -0,0 +1,288 @@
+subroutine provide_all_three_ints_bi_ortho
+ implicit none
+ BEGIN_DOC
+! routine that provides all necessary three-electron integrals 
+ END_DOC
+ if(three_body_h_tc)then
+  PROVIDE three_e_3_idx_direct_bi_ort three_e_3_idx_cycle_1_bi_ort three_e_3_idx_cycle_2_bi_ort
+  PROVIDE three_e_3_idx_exch23_bi_ort three_e_3_idx_exch13_bi_ort three_e_3_idx_exch12_bi_ort
+  PROVIDE three_e_4_idx_direct_bi_ort three_e_4_idx_cycle_1_bi_ort three_e_4_idx_cycle_2_bi_ort
+  PROVIDE three_e_4_idx_exch23_bi_ort three_e_4_idx_exch13_bi_ort three_e_4_idx_exch12_bi_ort
+ endif
+if(.not.double_normal_ord)then
+  PROVIDE three_e_5_idx_direct_bi_ort three_e_5_idx_cycle_1_bi_ort three_e_5_idx_cycle_2_bi_ort
+  PROVIDE three_e_5_idx_exch23_bi_ort three_e_5_idx_exch13_bi_ort three_e_5_idx_exch12_bi_ort
+else
+ PROVIDE normal_two_body_bi_orth
+endif
+end
+
+subroutine diag_htilde_three_body_ints_bi_ort(Nint, key_i, hthree)
+
+  BEGIN_DOC
+  !  diagonal element of htilde ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2)
+  double precision, intent(out) :: hthree
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2),i,j,ii,jj,ispin,jspin,m,mm
+  integer(bit_kind)             :: key_i_core(Nint,2)
+  double precision              :: direct_int, exchange_int
+  double precision              :: sym_3_e_int_from_6_idx_tensor
+  double precision              :: three_e_diag_parrallel_spin
+
+  if(core_tc_op)then
+   do i = 1, Nint
+    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+   enddo
+   call bitstring_to_list_ab(key_i_core,occ,Ne,Nint)
+  else
+   call bitstring_to_list_ab(key_i,occ,Ne,Nint)
+  endif
+  hthree = 0.d0
+
+  if(Ne(1)+Ne(2).ge.3)then
+!!  ! alpha/alpha/beta three-body
+   do i = 1, Ne(1)
+    ii = occ(i,1) 
+    do j = i+1, Ne(1)
+     jj = occ(j,1) 
+     do m = 1, Ne(2)
+      mm = occ(m,2) 
+!      direct_int = three_body_ints_bi_ort(mm,jj,ii,mm,jj,ii) USES THE 6-IDX TENSOR 
+!      exchange_int = three_body_ints_bi_ort(mm,jj,ii,mm,ii,jj) USES THE 6-IDX TENSOR 
+      direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR 
+      exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii) ! USES 3-IDX TENSOR 
+      hthree += direct_int - exchange_int
+     enddo
+    enddo
+   enddo
+  
+   ! beta/beta/alpha three-body
+   do i = 1, Ne(2)
+    ii = occ(i,2) 
+    do j = i+1, Ne(2)
+     jj = occ(j,2) 
+     do m = 1, Ne(1)
+      mm = occ(m,1) 
+      direct_int = three_e_3_idx_direct_bi_ort(mm,jj,ii) 
+      exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,ii)
+      hthree += direct_int - exchange_int
+     enddo
+    enddo
+   enddo
+
+   ! alpha/alpha/alpha three-body
+   do i = 1, Ne(1)
+    ii = occ(i,1) ! 1
+    do j = i+1, Ne(1)
+     jj = occ(j,1) ! 2 
+     do m = j+1, Ne(1)
+      mm = occ(m,1) ! 3 
+!      ref =  sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR 
+      hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS
+     enddo
+    enddo
+   enddo
+
+   ! beta/beta/beta three-body
+   do i = 1, Ne(2)
+    ii = occ(i,2) ! 1
+    do j = i+1, Ne(2)
+     jj = occ(j,2) ! 2
+     do m = j+1, Ne(2)
+      mm = occ(m,2) ! 3
+!      ref =  sym_3_e_int_from_6_idx_tensor(mm,jj,ii,mm,jj,ii) USES THE 6 IDX TENSOR 
+      hthree += three_e_diag_parrallel_spin(mm,jj,ii) ! USES ONLY 3-IDX TENSORS
+     enddo
+    enddo
+   enddo
+  endif
+
+end
+
+
+subroutine single_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for single excitation ONLY FOR THREE-BODY TERMS WITH BI ORTHONORMAL ORBITALS
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2)
+  double precision, intent(out) :: hthree
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2),i,j,ii,jj,ispin,jspin,k,kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: direct_int,phase,exchange_int,three_e_single_parrallel_spin 
+  double precision              :: sym_3_e_int_from_6_idx_tensor
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_j_core(Nint,2),key_i_core(Nint,2)
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+
+  hthree = 0.d0
+  call get_excitation_degree(key_i,key_j,degree,Nint)
+  if(degree.ne.1)then
+   return
+  endif
+  if(core_tc_op)then
+   do i = 1, Nint
+    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+    key_j_core(i,1) = xor(key_j(i,1),core_bitmask(i,1))
+    key_j_core(i,2) = xor(key_j(i,2),core_bitmask(i,2))
+   enddo
+   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
+  else
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+  endif
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 1, h1, p1, h2, p2, s1, s2)
+
+   ! alpha/alpha/beta three-body
+!   print*,'IN SLAT RULES'
+   if(Ne(1)+Ne(2).ge.3)then
+     ! hole of spin s1 :: contribution from purely other spin 
+     ispin = other_spin(s1) ! ispin is the other spin than s1
+     do i = 1, Ne(ispin)  ! i is the orbitals of the other spin than s1  
+      ii = occ(i,ispin)  
+      do j = i+1, Ne(ispin) ! j has the same spin than s1 
+       jj = occ(j,ispin) 
+       !   is == ispin  in :::   s1 is is  s1 is is      s1 is is s1 is is
+       !                       < h1 j  i | p1 j  i > - < h1 j  i | p1 i j >
+       !                                                   
+       direct_int   = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1)  
+       exchange_int = three_e_4_idx_exch23_bi_ort(jj,ii,p1,h1) 
+       hthree += direct_int - exchange_int
+      enddo
+     enddo
+  
+     ! hole of spin s1 :: contribution from mixed other spin / same spin
+     do i = 1, Ne(ispin) ! other spin 
+      ii = occ(i,ispin)  ! other spin 
+      do j = 1, Ne(s1)   ! same spin 
+       jj = occ(j,s1)    ! same spin 
+       direct_int   = three_e_4_idx_direct_bi_ort(jj,ii,p1,h1)
+       exchange_int = three_e_4_idx_exch13_bi_ort(jj,ii,p1,h1)
+       !              < h1 j  i | p1 j i > - < h1 j i | j p1 i >
+       hthree += direct_int - exchange_int
+      enddo
+     enddo
+!
+     ! hole of spin s1 :: PURE SAME SPIN CONTRIBUTIONS !!!
+     do i = 1, Ne(s1)
+      ii = occ(i,s1)
+      do j = i+1, Ne(s1)
+       jj = occ(j,s1)
+!       ref = sym_3_e_int_from_6_idx_tensor(jj,ii,p1,jj,ii,h1) 
+       hthree += three_e_single_parrallel_spin(jj,ii,p1,h1) ! USES THE 4-IDX TENSOR 
+      enddo
+     enddo
+   endif
+  hthree  *= phase
+
+end
+
+! ---
+
+subroutine double_htilde_three_body_ints_bi_ort(Nint, key_j, key_i, hthree)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for double excitation ONLY FOR THREE-BODY TERMS  WITH BI ORTHONORMAL ORBITALS
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2),key_i(Nint,2)
+  double precision, intent(out) :: hthree
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2),i,j,ii,jj,ispin,jspin,m,mm
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: phase
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_i_core(Nint,2)
+  double precision              :: direct_int,exchange_int,sym_3_e_int_from_6_idx_tensor
+  double precision              :: three_e_double_parrallel_spin
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hthree = 0.d0
+
+  if(degree.ne.2)then
+   return
+  endif
+
+  if(core_tc_op) then
+   do i = 1, Nint
+    key_i_core(i,1) = xor(key_i(i,1),core_bitmask(i,1))
+    key_i_core(i,2) = xor(key_i(i,2),core_bitmask(i,2))
+   enddo
+   call bitstring_to_list_ab(key_i_core, occ, Ne, Nint)
+  else
+   call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+  endif
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+    
+    if(Ne(1)+Ne(2).ge.3)then
+     if(s1==s2)then ! same spin excitation 
+      ispin = other_spin(s1)
+      do m = 1, Ne(ispin) ! direct(other_spin) - exchange(s1)
+       mm = occ(m,ispin)
+       direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+       exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1)
+       hthree += direct_int - exchange_int
+      enddo
+      do m = 1, Ne(s1) ! pure contribution from s1 
+       mm = occ(m,s1)
+       hthree += three_e_double_parrallel_spin(mm,p2,h2,p1,h1)
+      enddo 
+     else ! different spin excitation 
+       do m = 1, Ne(s1)
+        mm = occ(m,s1) ! 
+        direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+        exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1)
+        hthree += direct_int - exchange_int
+       enddo
+       do m = 1, Ne(s2)
+        mm = occ(m,s2) ! 
+        direct_int = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1)
+        exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1)
+        hthree += direct_int - exchange_int
+       enddo
+     endif
+    endif
+  hthree  *= phase
+ end
+
+! ---
+
--- a/src/tc_bi_ortho/slater_tc_opt.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt.irp.f
@ -0,0 +1,105 @@
+subroutine htilde_mu_mat_opt_bi_ortho_tot(key_j, key_i, Nint, htot)
+ implicit none
+  BEGIN_DOC
+  !
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  ! Returns the total matrix element 
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  !
+  END_DOC
+
+  use bitmasks
+ integer,           intent(in) :: Nint
+ integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+ double precision, intent(out) :: htot
+ double precision :: hmono, htwoe, hthree
+ call htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+end
+subroutine htilde_mu_mat_opt_bi_ortho(key_j, key_i, Nint, hmono, htwoe, hthree, htot)
+  BEGIN_DOC
+  !
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  ! Returns the detail of the matrix element in terms of single, two and three electron contribution. 
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
+  integer                       :: degree 
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+  hthree = 0.D0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.gt.2) return
+
+  if(degree == 0)then
+    call diag_htilde_mu_mat_fock_bi_ortho  (Nint, key_i, hmono, htwoe, hthree, htot)
+  else if (degree == 1)then
+    call single_htilde_mu_mat_fock_bi_ortho(Nint,key_j, key_i , hmono, htwoe, hthree, htot)
+  else if(degree == 2)then
+    call double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot)
+  endif
+
+  if(degree==0) then
+    htot += nuclear_repulsion
+  endif
+ 
+end
+
+! ---
+
+subroutine htilde_mu_mat_opt_bi_ortho_no_3e(key_j, key_i, Nint, htot)
+
+  BEGIN_DOC
+  !
+  ! <key_j | H_tilde | key_i> where |key_j> is developed on the LEFT basis and |key_i> is developed on the RIGHT basis
+  !!
+  ! Returns the detail of the matrix element WITHOUT ANY CONTRIBUTION FROM THE THREE ELECTRON TERMS 
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_i(Nint,2), key_j(Nint,2)
+  double precision, intent(out) :: htot
+  integer                       :: degree 
+
+  htot   = 0.d0
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.gt.2) return
+
+  if(degree == 0)then
+    call diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_i,htot)
+  else if (degree == 1)then
+    call single_htilde_mu_mat_fock_bi_ortho_no_3e(Nint,key_j, key_i , htot)
+  else if(degree == 2)then
+    call double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot)
+  endif
+
+  if(degree==0) then
+    htot += nuclear_repulsion
+  endif
+ 
+end
+
+! ---
--- a/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_diag.irp.f
@ -0,0 +1,473 @@
+ BEGIN_PROVIDER [ double precision, ref_tc_energy_tot]
+&BEGIN_PROVIDER [ double precision, ref_tc_energy_1e]
+&BEGIN_PROVIDER [ double precision, ref_tc_energy_2e]
+&BEGIN_PROVIDER [ double precision, ref_tc_energy_3e]
+ implicit none
+ BEGIN_DOC
+! Various component of the TC energy for the reference "HF" Slater determinant
+ END_DOC 
+ double precision :: hmono, htwoe, htot, hthree
+ call diag_htilde_mu_mat_bi_ortho(N_int,HF_bitmask , hmono, htwoe, htot)
+ ref_tc_energy_1e = hmono
+ ref_tc_energy_2e = htwoe 
+ if(three_body_h_tc)then
+  call diag_htilde_three_body_ints_bi_ort(N_int, HF_bitmask, hthree)
+  ref_tc_energy_3e = hthree
+ else
+  ref_tc_energy_3e = 0.d0
+ endif
+ ref_tc_energy_tot = ref_tc_energy_1e + ref_tc_energy_2e + ref_tc_energy_3e
+ END_PROVIDER 
+
+subroutine diag_htilde_mu_mat_fock_bi_ortho(Nint, det_in, hmono, htwoe, hthree, htot)
+  implicit none
+  BEGIN_DOC
+  ! Computes $\langle i|H|i \rangle$.
+  END_DOC
+  integer,intent(in)             :: Nint
+  integer(bit_kind),intent(in)   :: det_in(Nint,2)
+  double precision, intent(out)  :: hmono,htwoe,htot,hthree
+
+  integer(bit_kind)              :: hole(Nint,2)
+  integer(bit_kind)              :: particle(Nint,2)
+  integer                        :: i, nexc(2), ispin
+  integer                        :: occ_particle(Nint*bit_kind_size,2)
+  integer                        :: occ_hole(Nint*bit_kind_size,2)
+  integer(bit_kind)              :: det_tmp(Nint,2)
+  integer                        :: na, nb
+
+  ASSERT (Nint > 0)
+  ASSERT (sum(popcnt(det_in(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(det_in(:,2))) == elec_beta_num)
+
+
+  nexc(1) = 0
+  nexc(2) = 0
+  do i=1,Nint
+    hole(i,1)     = xor(det_in(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(det_in(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),det_in(i,1))
+    particle(i,2) = iand(hole(i,2),det_in(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+
+  if (nexc(1)+nexc(2) == 0) then
+    hmono = ref_tc_energy_1e
+    htwoe = ref_tc_energy_2e
+    hthree= ref_tc_energy_3e
+    htot = ref_tc_energy_tot
+    return
+  endif
+
+  !call debug_det(det_in,Nint)
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+
+  
+  det_tmp = ref_bitmask
+  hmono = ref_tc_energy_1e
+  htwoe = ref_tc_energy_2e 
+  hthree= ref_tc_energy_3e
+  do ispin=1,2
+    na = elec_num_tab(ispin)
+    nb = elec_num_tab(iand(ispin,1)+1)
+    do i=1,nexc(ispin)
+      !DIR$ FORCEINLINE
+      call ac_tc_operator( occ_particle(i,ispin), ispin, det_tmp, hmono,htwoe,hthree, Nint,na,nb)
+      !DIR$ FORCEINLINE
+      call a_tc_operator ( occ_hole    (i,ispin), ispin, det_tmp, hmono,htwoe,hthree, Nint,na,nb)
+    enddo
+  enddo
+  htot = hmono+htwoe+hthree
+end
+
+subroutine ac_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the ADDITION of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the ADDITION of that electron 
+  !
+  ! and the quantities hmono,htwoe,hthree are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe,hthree
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  double precision ::  direct_int, exchange_int
+  
+
+  if (iorb < 1) then
+    print *,  irp_here, ': iorb < 1'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+  if (iorb > mo_num) then
+    print *,  irp_here, ': iorb > mo_num'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  ASSERT (tmp(1) == elec_alpha_num)
+  ASSERT (tmp(2) == elec_beta_num)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key(k,ispin) = ibset(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  hmono = hmono + mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+  if(three_body_h_tc)then
+   !!!!! 3-e part 
+   !! same-spin/same-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = j+1, na
+     mm = occ(m,ispin)
+     hthree += three_e_diag_parrallel_spin_prov(mm,jj,iorb)
+    enddo
+   enddo
+   !! same-spin/oposite-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = 1, nb
+     mm = occ(m,other_spin)
+     direct_int   = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     hthree += direct_int - exchange_int
+    enddo
+   enddo
+   !! oposite-spin/opposite-spin
+    do j = 1, nb
+     jj = occ(j,other_spin) 
+     do m = j+1, nb 
+      mm = occ(m,other_spin) 
+      direct_int = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      exchange_int = three_e_3_idx_exch23_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      hthree += direct_int - exchange_int
+     enddo
+    enddo
+  endif
+
+  na = na+1
+end
+
+subroutine a_tc_operator(iorb,ispin,key,hmono,htwoe,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the REMOVAL of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the REMOVAL of that electron 
+  !
+  ! and the quantities hmono,htwoe,hthree are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe,hthree
+  
+  double precision  :: direct_int, exchange_int
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  integer                        :: tmp(2)
+
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k>0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  key(k,ispin) = ibclr(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  na = na-1
+
+  hmono = hmono - mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+  if(three_body_h_tc)then
+   !!!!! 3-e part 
+   !! same-spin/same-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = j+1, na
+     mm = occ(m,ispin)
+     hthree -= three_e_diag_parrallel_spin_prov(mm,jj,iorb)
+    enddo
+   enddo
+   !! same-spin/oposite-spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    do m = 1, nb
+     mm = occ(m,other_spin)
+     direct_int   = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     exchange_int = three_e_3_idx_exch12_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+     hthree -= (direct_int - exchange_int)
+    enddo
+   enddo
+   !! oposite-spin/opposite-spin
+    do j = 1, nb
+     jj = occ(j,other_spin) 
+     do m = j+1, nb 
+      mm = occ(m,other_spin) 
+      direct_int   = three_e_3_idx_direct_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      exchange_int = three_e_3_idx_exch23_bi_ort(mm,jj,iorb) ! USES 3-IDX TENSOR 
+      hthree -= (direct_int - exchange_int)
+     enddo
+    enddo
+  endif
+
+end
+
+
+subroutine diag_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, det_in,htot)
+  implicit none
+  BEGIN_DOC
+  ! Computes $\langle i|H|i \rangle$. WITHOUT ANY CONTRIBUTIONS FROM 3E TERMS
+  END_DOC
+  integer,intent(in)             :: Nint
+  integer(bit_kind),intent(in)   :: det_in(Nint,2)
+  double precision, intent(out)  :: htot
+  double precision :: hmono,htwoe
+
+  integer(bit_kind)              :: hole(Nint,2)
+  integer(bit_kind)              :: particle(Nint,2)
+  integer                        :: i, nexc(2), ispin
+  integer                        :: occ_particle(Nint*bit_kind_size,2)
+  integer                        :: occ_hole(Nint*bit_kind_size,2)
+  integer(bit_kind)              :: det_tmp(Nint,2)
+  integer                        :: na, nb
+
+  ASSERT (Nint > 0)
+  ASSERT (sum(popcnt(det_in(:,1))) == elec_alpha_num)
+  ASSERT (sum(popcnt(det_in(:,2))) == elec_beta_num)
+
+
+  nexc(1) = 0
+  nexc(2) = 0
+  do i=1,Nint
+    hole(i,1)     = xor(det_in(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(det_in(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),det_in(i,1))
+    particle(i,2) = iand(hole(i,2),det_in(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+
+  if (nexc(1)+nexc(2) == 0) then
+    hmono = ref_tc_energy_1e
+    htwoe = ref_tc_energy_2e
+    htot = ref_tc_energy_tot
+    return
+  endif
+
+  !call debug_det(det_in,Nint)
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, Nint)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+
+  
+  det_tmp = ref_bitmask
+  hmono = ref_tc_energy_1e
+  htwoe = ref_tc_energy_2e 
+  do ispin=1,2
+    na = elec_num_tab(ispin)
+    nb = elec_num_tab(iand(ispin,1)+1)
+    do i=1,nexc(ispin)
+      !DIR$ FORCEINLINE
+      call ac_tc_operator_no_3e( occ_particle(i,ispin), ispin, det_tmp, hmono,htwoe, Nint,na,nb)
+      !DIR$ FORCEINLINE
+      call a_tc_operator_no_3e ( occ_hole    (i,ispin), ispin, det_tmp, hmono,htwoe, Nint,na,nb)
+    enddo
+  enddo
+  htot = hmono+htwoe
+end
+
+subroutine ac_tc_operator_no_3e(iorb,ispin,key,hmono,htwoe,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the ADDITION of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the ADDITION of that electron 
+  !
+  ! and the quantities hmono,htwoe are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  double precision ::  direct_int, exchange_int
+  
+
+  if (iorb < 1) then
+    print *,  irp_here, ': iorb < 1'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+  if (iorb > mo_num) then
+    print *,  irp_here, ': iorb > mo_num'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  ASSERT (tmp(1) == elec_alpha_num)
+  ASSERT (tmp(2) == elec_beta_num)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key(k,ispin) = ibset(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  hmono = hmono + mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe = htwoe + mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+  na = na+1
+end
+
+subroutine a_tc_operator_no_3e(iorb,ispin,key,hmono,htwoe,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes one- and two-body energy corresponding 
+  ! 
+  ! to the REMOVAL of an electron in an orbital 'iorb' of spin 'ispin' 
+  ! 
+  ! onto a determinant 'key'.
+  !
+  ! in output, the determinant key is changed by the REMOVAL of that electron 
+  !
+  ! and the quantities hmono,htwoe are INCREMENTED 
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hmono,htwoe
+  
+  double precision  :: direct_int, exchange_int
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  integer                        :: tmp(2)
+
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k>0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  key(k,ispin) = ibclr(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  na = na-1
+
+  hmono = hmono - mo_bi_ortho_tc_one_e(iorb,iorb)
+
+  ! Same spin
+  do i=1,na
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj_anti(occ(i,ispin),iorb)
+  enddo
+
+  ! Opposite spin
+  do i=1,nb
+    htwoe= htwoe- mo_bi_ortho_tc_two_e_jj(occ(i,other_spin),iorb)
+  enddo
+
+end
+
--- a/src/tc_bi_ortho/slater_tc_opt_double.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_double.irp.f
@ -0,0 +1,476 @@
+
+subroutine double_htilde_mu_mat_fock_bi_ortho(Nint, key_j, key_i, hmono, htwoe, hthree, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for double excitation  ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint 
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int,phase
+
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  hthree = 0.d0
+  htot   = 0.d0
+
+  if(degree.ne.2)then
+   return
+  endif
+  integer :: degree_i,degree_j
+  call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int)
+  call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int)
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+  if(s1.ne.s2)then
+   ! opposite spin two-body 
+    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
+    if(three_body_h_tc)then
+     if(.not.double_normal_ord)then
+      if(degree_i>degree_j)then
+       call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree)
+      else
+       call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+      endif
+     elseif(double_normal_ord.and.elec_num+elec_num.gt.2)then
+      htwoe += normal_two_body_bi_orth(p2,h2,p1,h1)!!! WTF ???
+     endif
+    endif
+  else
+   ! same spin two-body 
+   ! direct terms 
+   htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
+   ! exchange terms 
+   htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
+   if(three_body_h_tc)then
+    if(.not.double_normal_ord)then
+     if(degree_i>degree_j)then
+      call three_comp_two_e_elem(key_j,h1,h2,p1,p2,s1,s2,hthree)
+     else
+      call three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+     endif
+    elseif(double_normal_ord.and.elec_num+elec_num.gt.2)then
+      htwoe -= normal_two_body_bi_orth(h2,p1,h1,p2)!!! WTF ???
+      htwoe += normal_two_body_bi_orth(h1,p1,h2,p2)!!! WTF ???
+    endif
+   endif
+  endif
+  hthree *= phase
+  htwoe  *= phase
+  htot    =  htwoe + hthree
+
+end
+
+
+
+subroutine three_comp_two_e_elem(key_i,h1,h2,p1,p2,s1,s2,hthree)
+ implicit none
+ integer(bit_kind), intent(in) :: key_i(N_int,2)
+ integer, intent(in) :: h1,h2,p1,p2,s1,s2
+ double precision, intent(out) :: hthree
+ integer :: nexc(2),i,ispin,na,nb
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: particle(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_particle(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_particle(2)
+ integer(bit_kind)              :: det_tmp(N_int,2)
+ integer :: ipart, ihole
+ double precision :: direct_int, exchange_int
+
+  nexc(1) = 0
+  nexc(2) = 0
+  !! Get all the holes and particles of key_i with respect to the ROHF determinant
+  do i=1,N_int
+    hole(i,1)     = xor(key_i(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(key_i(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),key_i(i,1))
+    particle(i,2) = iand(hole(i,2),key_i(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+  if(s1==s2.and.s1==1)then
+   !!!!!!!!!!!!!!!!!!!!!!!!!! alpha/alpha double exc
+   hthree = eff_2_e_from_3_e_aa(p2,p1,h2,h1) 
+   if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant 
+    !!!!!!!! the matrix element is already exact 
+    !!!!!!!! else you need to take care of holes and particles 
+    !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!!
+    ispin = 1 ! i==alpha ==> pure same spin terms
+    do i = 1, nexc(ispin) ! number of couple of holes/particles 
+     ipart=occ_particle(i,ispin)
+     hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1)
+     ihole=occ_hole(i,ispin)
+     hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1)
+    enddo
+    ispin = 2 ! i==beta ==> alpha/alpha/beta terms
+    do i = 1, nexc(ispin) ! number of couple of holes/particles 
+     ! exchange between (h1,p1) and (h2,p2)
+     ipart=occ_particle(i,ispin)
+     direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+     exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1)
+     hthree += direct_int - exchange_int
+     ihole=occ_hole(i,ispin)
+     direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+     exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1)
+     hthree -= direct_int - exchange_int
+    enddo
+   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  elseif(s1==s2.and.s1==2)then 
+   !!!!!!!!!!!!!!!!!!!!!!!!!! beta/beta double exc
+   hthree = eff_2_e_from_3_e_bb(p2,p1,h2,h1)
+   if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant 
+   !!!!!!!! the matrix element is already exact 
+   !!!!!!!! else you need to take care of holes and particles 
+   !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!!
+   ispin = 2 ! i==beta  ==> pure same spin terms
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ipart=occ_particle(i,ispin)
+    hthree += three_e_double_parrallel_spin_prov(ipart,p2,h2,p1,h1)
+    ihole=occ_hole(i,ispin)
+    hthree -= three_e_double_parrallel_spin_prov(ihole,p2,h2,p1,h1)
+   enddo
+   ispin = 1 ! i==alpha==> beta/beta/alpha terms
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ! exchange between (h1,p1) and (h2,p2)
+    ipart=occ_particle(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch12_bi_ort(ipart,p2,h2,p1,h1)
+    hthree += direct_int - exchange_int
+    ihole=occ_hole(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch12_bi_ort(ihole,p2,h2,p1,h1)
+    hthree -= direct_int - exchange_int
+   enddo
+  else                         ! (h1,p1) == alpha/(h2,p2) == beta 
+   hthree = eff_2_e_from_3_e_ab(p2,p1,h2,h1)
+   if(nexc(1)+nexc(2) ==0)return !! if you're on the reference determinant 
+   !!!!!!!! the matrix element is already exact 
+   !!!!!!!! else you need to take care of holes and particles 
+   !!!!!!!!!!!!! Holes and particles !!!!!!!!!!!!!!!!!!!!!!!
+   ispin = 1 ! i==alpha ==> alpha/beta/alpha terms 
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ! exchange between (h1,p1) and i
+    ipart=occ_particle(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch13_bi_ort(ipart,p2,h2,p1,h1)
+    hthree += direct_int - exchange_int
+    ihole=occ_hole(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch13_bi_ort(ihole,p2,h2,p1,h1)
+    hthree -= direct_int - exchange_int
+   enddo
+   ispin = 2 ! i==beta  ==> alpha/beta/beta  terms 
+   do i = 1, nexc(ispin) ! number of couple of holes/particles 
+    ! exchange between (h2,p2) and i
+    ipart=occ_particle(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ipart,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch23_bi_ort(ipart,p2,h2,p1,h1)
+    hthree += direct_int - exchange_int
+    ihole=occ_hole(i,ispin)
+    direct_int  = three_e_5_idx_direct_bi_ort(ihole,p2,h2,p1,h1)
+    exchange_int = three_e_5_idx_exch23_bi_ort(ihole,p2,h2,p1,h1)
+    hthree -= direct_int - exchange_int
+   enddo
+  endif
+end
+
+
+BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_ab, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/beta double excitations 
+!
+! from contraction with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_beta a_h2_beta a_h1_alpha
+ END_DOC
+ integer :: i,h1,p1,h2,p2
+ integer :: hh1,hh2,pp1,pp2,m,mm
+ integer                        :: Ne(2)
+ integer,           allocatable :: occ(:,:)
+ double precision :: contrib
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ call give_contrib_for_abab(1,1,1,1,occ,Ne,contrib)
+ eff_2_e_from_3_e_ab = 0.d0
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & 
+ !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_ab)
+ !$OMP DO SCHEDULE (static) 
+  do hh1 = 1, n_act_orb !! alpha 
+    h1 = list_act(hh1) 
+    do hh2 = 1, n_act_orb !! beta 
+      h2 = list_act(hh2) 
+      do pp1 = 1, n_act_orb !! alpha
+        p1 = list_act(pp1)
+        do pp2 = 1, n_act_orb !! beta 
+          p2 = list_act(pp2)
+          call give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib)
+          eff_2_e_from_3_e_ab(p2,p1,h2,h1) = contrib
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+END_PROVIDER 
+
+subroutine give_contrib_for_abab(h1,h2,p1,p2,occ,Ne,contrib)
+ implicit none
+ BEGIN_DOC 
+! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_beta
+!
+! on top of a determinant whose occupied orbitals is in (occ, Ne)
+ END_DOC
+ integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2)
+ double precision, intent(out) :: contrib
+ integer :: mm,m 
+ double precision :: direct_int, exchange_int
+ !! h1,p1 == alpha 
+ !! h2,p2 == beta
+ contrib = 0.d0
+ do mm = 1, Ne(1) !! alpha 
+   m = occ(mm,1)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h1,p1) and m
+   exchange_int = three_e_5_idx_exch13_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+
+ do mm = 1, Ne(2) !! beta
+   m = occ(mm,2)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h2,p2) and m
+   exchange_int = three_e_5_idx_exch23_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+end
+
+BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_aa, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for alpha/alpha double excitations 
+!
+! from contractionelec_alpha_num with HF density = a^{dagger}_p1_alpha a^{dagger}_p2_alpha a_h2_alpha a_h1_alpha
+!
+! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill 
+!
+! ||||    h2>h1, p2>p1   ||||
+ END_DOC
+ integer :: i,h1,p1,h2,p2
+ integer :: hh1,hh2,pp1,pp2,m,mm
+ integer                        :: Ne(2)
+ integer,           allocatable :: occ(:,:)
+ double precision :: contrib
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ call give_contrib_for_aaaa(1 ,1 ,1 ,1 ,occ,Ne,contrib)
+ eff_2_e_from_3_e_aa = 100000000.d0
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & 
+ !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_aa)
+ !$OMP DO SCHEDULE (static) 
+  do hh1 = 1, n_act_orb !! alpha 
+    h1 = list_act(hh1) 
+    do hh2 = hh1+1, n_act_orb !! alpha
+      h2 = list_act(hh2) 
+      do pp1 = 1, n_act_orb !! alpha
+        p1 = list_act(pp1)
+        do pp2 = pp1+1, n_act_orb !! alpha
+          p2 = list_act(pp2)
+          call give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib)
+          eff_2_e_from_3_e_aa(p2,p1,h2,h1) = contrib
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+END_PROVIDER 
+
+subroutine give_contrib_for_aaaa(h1,h2,p1,p2,occ,Ne,contrib)
+ implicit none
+ BEGIN_DOC 
+! gives the contribution for a double excitation (h1,p1)_alpha (h2,p2)_alpha
+!
+! on top of a determinant whose occupied orbitals is in (occ, Ne)
+ END_DOC
+ integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2)
+ double precision, intent(out) :: contrib
+ integer :: mm,m 
+ double precision :: direct_int, exchange_int
+ !! h1,p1 == alpha 
+ !! h2,p2 == alpha
+ contrib = 0.d0
+ do mm = 1, Ne(1) !! alpha ==> pure parallele spin contribution
+   m = occ(mm,1)
+   contrib += three_e_double_parrallel_spin_prov(m,p2,h2,p1,h1)
+ enddo
+
+ do mm = 1, Ne(2) !! beta
+   m = occ(mm,2)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h1,p1) and (h2,p2)
+   exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+end
+
+
+BEGIN_PROVIDER [ double precision, eff_2_e_from_3_e_bb, (mo_num, mo_num, mo_num, mo_num)]
+ implicit none
+ BEGIN_DOC
+! eff_2_e_from_3_e_ab(p2,p1,h2,h1) = Effective Two-electron operator for beta/beta double excitations 
+!
+! from contractionelec_beta_num with HF density = a^{dagger}_p1_beta a^{dagger}_p2_beta a_h2_beta a_h1_beta
+!
+! WARNING :: to be coherent with the phase convention used in the Hamiltonian matrix elements, you must fulfill 
+!
+! ||||    h2>h1, p2>p1   ||||
+ END_DOC
+ integer :: i,h1,p1,h2,p2
+ integer :: hh1,hh2,pp1,pp2,m,mm
+ integer                        :: Ne(2)
+ integer,           allocatable :: occ(:,:)
+ double precision :: contrib
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask,occ,Ne,N_int)
+ call give_contrib_for_bbbb(1,1 ,1 ,1 ,occ,Ne,contrib)
+ eff_2_e_from_3_e_bb = 100000000.d0
+ !$OMP PARALLEL                                                                         &
+ !$OMP DEFAULT (NONE)                                                                   &
+ !$OMP PRIVATE (hh1, h1, hh2, h2, pp1, p1, pp2, p2, contrib) & 
+ !$OMP SHARED (n_act_orb, list_act, Ne,occ, eff_2_e_from_3_e_bb)
+ !$OMP DO SCHEDULE (static) 
+  do hh1 = 1, n_act_orb !! beta 
+    h1 = list_act(hh1) 
+    do hh2 = hh1+1, n_act_orb !! beta
+      h2 = list_act(hh2) 
+      do pp1 = 1, n_act_orb !! beta
+        p1 = list_act(pp1)
+        do pp2 = pp1+1, n_act_orb !! beta
+          p2 = list_act(pp2)
+          call give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib)
+          eff_2_e_from_3_e_bb(p2,p1,h2,h1) = contrib
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+END_PROVIDER 
+
+subroutine give_contrib_for_bbbb(h1,h2,p1,p2,occ,Ne,contrib)
+ implicit none
+ BEGIN_DOC 
+! gives the contribution for a double excitation (h1,p1)_beta (h2,p2)_beta
+!
+! on top of a determinant whose occupied orbitals is in (occ, Ne)
+ END_DOC
+ integer, intent(in) :: h1,h2,p1,p2,occ(N_int*bit_kind_size,2),Ne(2)
+ double precision, intent(out) :: contrib
+ integer :: mm,m 
+ double precision :: direct_int, exchange_int
+ !! h1,p1 == beta
+ !! h2,p2 == beta
+ contrib = 0.d0
+ do mm = 1, Ne(2) !! beta ==> pure parallele spin contribution
+   m = occ(mm,1)
+   contrib += three_e_double_parrallel_spin_prov(m,p2,h2,p1,h1)
+ enddo
+
+ do mm = 1, Ne(1) !! alpha
+   m = occ(mm,1)
+   direct_int   = three_e_5_idx_direct_bi_ort(mm,p2,h2,p1,h1) 
+   ! exchange between (h1,p1) and (h2,p2)
+   exchange_int = three_e_5_idx_exch12_bi_ort(mm,p2,h2,p1,h1)
+   contrib += direct_int - exchange_int
+ enddo
+end
+
+
+subroutine double_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot)
+
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for double excitation  ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint 
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: htot
+  double precision :: hmono, htwoe
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int,phase
+
+
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+
+  if(degree.ne.2)then
+   return
+  endif
+  integer :: degree_i,degree_j
+  call get_excitation_degree(ref_bitmask,key_i,degree_i,N_int)
+  call get_excitation_degree(ref_bitmask,key_j,degree_j,N_int)
+  call get_double_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc, 2, h1, p1, h2, p2, s1, s2)
+
+  if(s1.ne.s2)then
+   ! opposite spin two-body 
+    htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1) 
+  else
+   ! same spin two-body 
+   ! direct terms 
+   htwoe  = mo_bi_ortho_tc_two_e(p2,p1,h2,h1)  
+   ! exchange terms 
+   htwoe -= mo_bi_ortho_tc_two_e(p1,p2,h2,h1) 
+  endif
+  htwoe  *= phase
+  htot    =  htwoe 
+
+end
+
--- a/src/tc_bi_ortho/slater_tc_opt_single.irp.f
+++ b/src/tc_bi_ortho/slater_tc_opt_single.irp.f
@ -0,0 +1,572 @@
+
+
+subroutine  single_htilde_mu_mat_fock_bi_ortho (Nint, key_j, key_i, hmono, htwoe, hthree, htot)
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) :: hmono, htwoe, hthree, htot
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int, phase
+  double precision              :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_j_core(Nint,2), key_i_core(Nint,2)
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  hthree = 0.d0
+  htot   = 0.d0
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.ne.1)then
+   return
+  endif
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc,1,h1,p1,h2,p2,s1,s2)
+  call get_single_excitation_from_fock_tc(key_i,key_j,h1,p1,s1,phase,hmono,htwoe,hthree,htot)
+end
+
+
+subroutine get_single_excitation_from_fock_tc(key_i,key_j,h,p,spin,phase,hmono,htwoe,hthree,htot)
+ use bitmasks
+ implicit none
+ integer,intent(in) :: h,p,spin
+ double precision, intent(in)  :: phase
+ integer(bit_kind), intent(in) :: key_i(N_int,2), key_j(N_int,2)
+ double precision, intent(out) :: hmono,htwoe,hthree,htot
+ integer(bit_kind) :: differences(N_int,2)
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: partcl(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_partcl(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
+ integer :: i0,i
+ double precision :: buffer_c(mo_num),buffer_x(mo_num)
+ do i=1, mo_num
+   buffer_c(i) = tc_2e_3idx_coulomb_integrals(i,p,h)
+   buffer_x(i) = tc_2e_3idx_exchange_integrals(i,p,h)
+ enddo
+ do i = 1, N_int
+  differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1))
+  differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2))
+  hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1))
+  hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2))
+  partcl(i,1) = iand(differences(i,1),key_i(i,1))
+  partcl(i,2) = iand(differences(i,2),key_i(i,2))
+ enddo
+ call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
+ call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
+ hmono = mo_bi_ortho_tc_one_e(p,h)
+ htwoe = fock_op_2_e_tc_closed_shell(p,h)
+ ! holes :: direct terms
+ do i0 = 1, n_occ_ab_hole(1)
+  i = occ_hole(i0,1)
+  htwoe -= buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_hole(2)
+  i = occ_hole(i0,2)
+  htwoe -= buffer_c(i)
+ enddo
+
+ ! holes :: exchange terms
+ do i0 = 1, n_occ_ab_hole(spin)
+  i = occ_hole(i0,spin)
+  htwoe += buffer_x(i)
+ enddo
+
+ ! particles :: direct terms
+ do i0 = 1, n_occ_ab_partcl(1)
+  i = occ_partcl(i0,1)
+  htwoe += buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_partcl(2)
+  i = occ_partcl(i0,2)
+  htwoe += buffer_c(i)
+ enddo
+
+ ! particles :: exchange terms
+ do i0 = 1, n_occ_ab_partcl(spin)
+  i = occ_partcl(i0,spin)
+  htwoe -= buffer_x(i)
+ enddo
+ hthree = 0.d0
+ if (three_body_h_tc)then
+  call three_comp_fock_elem(key_i,h,p,spin,hthree)
+ endif
+
+
+ htwoe = htwoe * phase
+ hmono = hmono * phase
+ hthree = hthree * phase
+ htot  = htwoe + hmono + hthree
+
+end
+
+subroutine three_comp_fock_elem(key_i,h_fock,p_fock,ispin_fock,hthree)
+ implicit none
+ integer,intent(in) :: h_fock,p_fock,ispin_fock
+ integer(bit_kind), intent(in) :: key_i(N_int,2)
+ double precision, intent(out) :: hthree
+ integer :: nexc(2),i,ispin,na,nb
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: particle(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_particle(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_particle(2)
+ integer(bit_kind)              :: det_tmp(N_int,2)
+
+
+  nexc(1) = 0
+  nexc(2) = 0
+  !! Get all the holes and particles of key_i with respect to the ROHF determinant
+  do i=1,N_int
+    hole(i,1)     = xor(key_i(i,1),ref_bitmask(i,1))
+    hole(i,2)     = xor(key_i(i,2),ref_bitmask(i,2))
+    particle(i,1) = iand(hole(i,1),key_i(i,1))
+    particle(i,2) = iand(hole(i,2),key_i(i,2))
+    hole(i,1)     = iand(hole(i,1),ref_bitmask(i,1))
+    hole(i,2)     = iand(hole(i,2),ref_bitmask(i,2))
+    nexc(1)       = nexc(1) + popcnt(hole(i,1))
+    nexc(2)       = nexc(2) + popcnt(hole(i,2))
+  enddo
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(particle, occ_particle, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of particles alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of particle beta 
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(hole, occ_hole, tmp, N_int)
+  ASSERT (tmp(1) == nexc(1)) ! Number of holes alpha
+  ASSERT (tmp(2) == nexc(2)) ! Number of holes beta 
+
+  !! Initialize the matrix element with the reference ROHF Slater determinant Fock element
+  if(ispin_fock==1)then
+   hthree = fock_a_tot_3e_bi_orth(p_fock,h_fock) 
+  else 
+   hthree = fock_b_tot_3e_bi_orth(p_fock,h_fock) 
+  endif
+  det_tmp = ref_bitmask
+  do ispin=1,2
+    na = elec_num_tab(ispin)
+    nb = elec_num_tab(iand(ispin,1)+1)
+    do i=1,nexc(ispin)
+      !DIR$ FORCEINLINE
+      call fock_ac_tc_operator( occ_particle(i,ispin), ispin, det_tmp, h_fock,p_fock, ispin_fock, hthree, N_int,na,nb)
+      !DIR$ FORCEINLINE
+      call fock_a_tc_operator ( occ_hole    (i,ispin), ispin, det_tmp, h_fock,p_fock, ispin_fock, hthree, N_int,na,nb)
+    enddo
+  enddo
+end
+
+subroutine fock_ac_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes the contribution to the three-electron part of the Fock operator 
+  !
+  ! a^dagger_{p_fock} a_{h_fock} of spin ispin_fock
+  ! 
+  ! on top of a determinant 'key' on which you ADD an electron of spin ispin in orbital iorb
+  ! 
+  ! in output, the determinant key is changed by the ADDITION of that electron 
+  !
+  ! the output hthree is INCREMENTED
+  END_DOC
+  integer, intent(in)              :: iorb, ispin, Nint, h_fock,p_fock, ispin_fock
+  integer, intent(inout)           :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout)  :: hthree
+
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,j
+  double precision :: direct_int, exchange_int
+  
+
+  if (iorb < 1) then
+    print *,  irp_here, ': iorb < 1'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+  if (iorb > mo_num) then
+    print *,  irp_here, ': iorb > mo_num'
+    print *,  iorb, mo_num
+    stop -1
+  endif
+
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  integer                        :: tmp(2)
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  ASSERT (tmp(1) == elec_alpha_num)
+  ASSERT (tmp(2) == elec_beta_num)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k >0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  ASSERT (l >= 0)
+  key(k,ispin) = ibset(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+
+  !! spin of other electrons == ispin 
+  if(ispin == ispin_fock)then
+   !! in what follows :: jj == other electrons in the determinant 
+   !!                 :: iorb == electron that has been added of spin ispin
+   !!                 :: p_fock, h_fock == hole particle of spin ispin_fock
+   !! jj = ispin = ispin_fock >> pure parallel spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    hthree += three_e_single_parrallel_spin_prov(jj,iorb,p_fock,h_fock)
+   enddo
+   !! spin of jj == other spin than ispin AND ispin_fock
+   !! exchange between the iorb and (h_fock, p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree += direct_int - exchange_int
+   enddo
+  else !! ispin NE to ispin_fock
+   !! jj = ispin BUT NON EQUAL TO ispin_fock 
+   !! exchange between the jj and iorb
+   do j = 1, na
+    jj = occ(j,ispin)
+    direct_int   = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch23_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree += direct_int - exchange_int
+   enddo
+   !! jj = other_spin than ispin BUT jj == ispin_fock
+   !! exchange between jj and (h_fock,p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch13_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree += direct_int - exchange_int
+   enddo
+  endif
+
+  na = na+1
+end
+
+subroutine fock_a_tc_operator(iorb,ispin,key, h_fock,p_fock, ispin_fock,hthree,Nint,na,nb)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Routine that computes the contribution to the three-electron part of the Fock operator 
+  !
+  ! a^dagger_{p_fock} a_{h_fock} of spin ispin_fock
+  ! 
+  ! on top of a determinant 'key' on which you REMOVE an electron of spin ispin in orbital iorb
+  ! 
+  ! in output, the determinant key is changed by the REMOVAL of that electron 
+  !
+  ! the output hthree is INCREMENTED
+  END_DOC
+  integer, intent(in)            :: iorb, ispin, Nint, h_fock,p_fock, ispin_fock
+  integer, intent(inout)         :: na, nb
+  integer(bit_kind), intent(inout) :: key(Nint,2)
+  double precision, intent(inout) :: hthree
+  
+  double precision  :: direct_int, exchange_int
+  integer                        :: occ(Nint*bit_kind_size,2)
+  integer                        :: other_spin
+  integer                        :: k,l,i,jj,mm,j,m
+  integer                        :: tmp(2)
+
+  ASSERT (iorb > 0)
+  ASSERT (ispin > 0)
+  ASSERT (ispin < 3)
+  ASSERT (Nint > 0)
+
+  k = shiftr(iorb-1,bit_kind_shift)+1
+  ASSERT (k>0)
+  l = iorb - shiftl(k-1,bit_kind_shift)-1
+  key(k,ispin) = ibclr(key(k,ispin),l)
+  other_spin = iand(ispin,1)+1
+
+  !DIR$ FORCEINLINE
+  call bitstring_to_list_ab(key, occ, tmp, Nint)
+  na = na-1
+  !! spin of other electrons == ispin 
+  if(ispin == ispin_fock)then
+   !! in what follows :: jj == other electrons in the determinant 
+   !!                 :: iorb == electron that has been added of spin ispin
+   !!                 :: p_fock, h_fock == hole particle of spin ispin_fock
+   !! jj = ispin = ispin_fock >> pure parallel spin
+   do j = 1, na
+    jj = occ(j,ispin)
+    hthree -= three_e_single_parrallel_spin_prov(jj,iorb,p_fock,h_fock)
+   enddo
+   !! spin of jj == other spin than ispin AND ispin_fock
+   !! exchange between the iorb and (h_fock, p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch12_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree -= direct_int - exchange_int
+   enddo
+  else !! ispin NE to ispin_fock
+   !! jj = ispin BUT NON EQUAL TO ispin_fock 
+   !! exchange between the jj and iorb
+   do j = 1, na
+    jj = occ(j,ispin)
+    direct_int   = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch23_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree -= direct_int - exchange_int
+   enddo
+   !! jj = other_spin than ispin BUT jj == ispin_fock
+   !! exchange between jj and (h_fock,p_fock)
+   do j = 1, nb
+    jj = occ(j,other_spin) 
+    direct_int = three_e_4_idx_direct_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    exchange_int = three_e_4_idx_exch13_bi_ort(jj,iorb,p_fock,h_fock) ! USES 4-IDX TENSOR 
+    hthree -= direct_int - exchange_int
+   enddo
+  endif
+
+end
+
+
+BEGIN_PROVIDER [double precision, fock_op_2_e_tc_closed_shell, (mo_num, mo_num) ]
+ implicit none
+ BEGIN_DOC
+! Closed-shell part of the Fock operator for the TC operator
+ END_DOC
+ integer :: h0,p0,h,p,k0,k,i
+ integer :: n_occ_ab(2)
+ integer :: occ(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_virt(2)
+ integer :: occ_virt(N_int*bit_kind_size,2)
+ integer(bit_kind) :: key_test(N_int)
+ integer(bit_kind) :: key_virt(N_int,2)
+ double precision :: accu
+
+ fock_op_2_e_tc_closed_shell = -1000.d0
+ call bitstring_to_list_ab(ref_closed_shell_bitmask, occ, n_occ_ab, N_int)
+ do i = 1, N_int
+  key_virt(i,1) = full_ijkl_bitmask(i)
+  key_virt(i,2) = full_ijkl_bitmask(i)
+  key_virt(i,1) = xor(key_virt(i,1),ref_closed_shell_bitmask(i,1))
+  key_virt(i,2) = xor(key_virt(i,2),ref_closed_shell_bitmask(i,2))
+ enddo
+ call bitstring_to_list_ab(key_virt, occ_virt, n_occ_ab_virt, N_int)
+ ! docc ---> virt single excitations
+ do h0 = 1,  n_occ_ab(1)
+  h=occ(h0,1)
+  do p0 = 1, n_occ_ab_virt(1)
+   p = occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ do h0 = 1, n_occ_ab_virt(1)
+  h = occ_virt(h0,1)
+  do p0 = 1,  n_occ_ab(1)
+   p=occ(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ ! virt ---> virt single excitations
+ do h0 = 1,  n_occ_ab_virt(1)
+  h=occ_virt(h0,1)
+  do p0 = 1, n_occ_ab_virt(1)
+   p = occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ do h0 = 1, n_occ_ab_virt(1)
+  h = occ_virt(h0,1)
+  do p0 = 1,  n_occ_ab_virt(1)
+   p=occ_virt(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+
+ ! docc ---> docc single excitations
+ do h0 = 1,  n_occ_ab(1)
+  h=occ(h0,1)
+  do p0 = 1, n_occ_ab(1)
+   p = occ(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+ do h0 = 1, n_occ_ab(1)
+  h = occ(h0,1)
+  do p0 = 1,  n_occ_ab(1)
+   p=occ(p0,1)
+   accu = 0.d0
+   do k0 = 1, n_occ_ab(1)
+    k = occ(k0,1)
+    accu += 2.d0 * tc_2e_3idx_coulomb_integrals(k,p,h) - tc_2e_3idx_exchange_integrals(k,p,h)
+   enddo
+   fock_op_2_e_tc_closed_shell(p,h) = accu 
+  enddo
+ enddo
+
+! do i = 1, mo_num
+!  write(*,'(100(F10.5,X))')fock_op_2_e_tc_closed_shell(:,i)
+! enddo
+
+END_PROVIDER
+
+
+subroutine  single_htilde_mu_mat_fock_bi_ortho_no_3e(Nint, key_j, key_i, htot)
+  BEGIN_DOC
+  ! <key_j | H_tilde | key_i> for single excitation ONLY FOR ONE- AND TWO-BODY TERMS 
+  !!
+  !! WARNING !!
+  ! 
+  ! Non hermitian !!
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+  integer,           intent(in) :: Nint
+  integer(bit_kind), intent(in) :: key_j(Nint,2), key_i(Nint,2)
+  double precision, intent(out) ::  htot
+  double precision :: hmono, htwoe
+  integer                       :: occ(Nint*bit_kind_size,2)
+  integer                       :: Ne(2), i, j, ii, jj, ispin, jspin, k, kk
+  integer                       :: degree,exc(0:2,2,2)
+  integer                       :: h1, p1, h2, p2, s1, s2
+  double precision              :: get_mo_two_e_integral_tc_int, phase
+  double precision              :: direct_int, exchange_int_12, exchange_int_23, exchange_int_13
+  integer                       :: other_spin(2)
+  integer(bit_kind)             :: key_j_core(Nint,2), key_i_core(Nint,2)
+
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  hmono  = 0.d0
+  htwoe  = 0.d0
+  htot   = 0.d0
+  call get_excitation_degree(key_i, key_j, degree, Nint)
+  if(degree.ne.1)then
+   return
+  endif
+  call bitstring_to_list_ab(key_i, occ, Ne, Nint)
+
+  call get_single_excitation(key_i, key_j, exc, phase, Nint)
+  call decode_exc(exc,1,h1,p1,h2,p2,s1,s2)
+  call get_single_excitation_from_fock_tc_no_3e(key_i,key_j,h1,p1,s1,phase,hmono,htwoe,htot)
+end
+
+
+subroutine get_single_excitation_from_fock_tc_no_3e(key_i,key_j,h,p,spin,phase,hmono,htwoe,htot)
+ use bitmasks
+ implicit none
+ integer,intent(in) :: h,p,spin
+ double precision, intent(in)  :: phase
+ integer(bit_kind), intent(in) :: key_i(N_int,2), key_j(N_int,2)
+ double precision, intent(out) :: hmono,htwoe,htot
+ integer(bit_kind) :: differences(N_int,2)
+ integer(bit_kind) :: hole(N_int,2)
+ integer(bit_kind) :: partcl(N_int,2)
+ integer :: occ_hole(N_int*bit_kind_size,2)
+ integer :: occ_partcl(N_int*bit_kind_size,2)
+ integer :: n_occ_ab_hole(2),n_occ_ab_partcl(2)
+ integer :: i0,i
+ double precision :: buffer_c(mo_num),buffer_x(mo_num)
+ do i=1, mo_num
+   buffer_c(i) = tc_2e_3idx_coulomb_integrals(i,p,h)
+   buffer_x(i) = tc_2e_3idx_exchange_integrals(i,p,h)
+ enddo
+ do i = 1, N_int
+  differences(i,1) = xor(key_i(i,1),ref_closed_shell_bitmask(i,1))
+  differences(i,2) = xor(key_i(i,2),ref_closed_shell_bitmask(i,2))
+  hole(i,1) = iand(differences(i,1),ref_closed_shell_bitmask(i,1))
+  hole(i,2) = iand(differences(i,2),ref_closed_shell_bitmask(i,2))
+  partcl(i,1) = iand(differences(i,1),key_i(i,1))
+  partcl(i,2) = iand(differences(i,2),key_i(i,2))
+ enddo
+ call bitstring_to_list_ab(hole, occ_hole, n_occ_ab_hole, N_int)
+ call bitstring_to_list_ab(partcl, occ_partcl, n_occ_ab_partcl, N_int)
+ hmono = mo_bi_ortho_tc_one_e(p,h)
+ htwoe = fock_op_2_e_tc_closed_shell(p,h)
+ ! holes :: direct terms
+ do i0 = 1, n_occ_ab_hole(1)
+  i = occ_hole(i0,1)
+  htwoe -= buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_hole(2)
+  i = occ_hole(i0,2)
+  htwoe -= buffer_c(i)
+ enddo
+
+ ! holes :: exchange terms
+ do i0 = 1, n_occ_ab_hole(spin)
+  i = occ_hole(i0,spin)
+  htwoe += buffer_x(i)
+ enddo
+
+ ! particles :: direct terms
+ do i0 = 1, n_occ_ab_partcl(1)
+  i = occ_partcl(i0,1)
+  htwoe += buffer_c(i)
+ enddo
+ do i0 = 1, n_occ_ab_partcl(2)
+  i = occ_partcl(i0,2)
+  htwoe += buffer_c(i)
+ enddo
+
+ ! particles :: exchange terms
+ do i0 = 1, n_occ_ab_partcl(spin)
+  i = occ_partcl(i0,spin)
+  htwoe -= buffer_x(i)
+ enddo
+ htwoe = htwoe * phase
+ hmono = hmono * phase
+ htot  = htwoe + hmono 
+
+end
+
--- a/src/tc_bi_ortho/symmetrized_3_e_int.irp.f
+++ b/src/tc_bi_ortho/symmetrized_3_e_int.irp.f
@ -0,0 +1,111 @@
+subroutine give_all_perm_for_three_e(n,l,k,m,j,i,idx_list,phase)
+ implicit none
+ BEGIN_DOC
+ ! returns all the list of permutting indices for the antimmetrization of 
+ !
+ ! (k^dagger l^dagger n^dagger m j i)  <nlk|L|mji>   when all indices have the same spins
+ !
+ ! idx_list(:,i) == list of the 6 indices corresponding the permutation "i"
+ !
+ ! phase(i) == phase of the permutation "i" 
+ !
+ ! there are in total 6 permutations with different indices
+ END_DOC
+ integer, intent(in)  :: n,l,k,m,j,i
+ integer, intent(out) :: idx_list(6,6) 
+ double precision :: phase(6)
+ integer :: list(6)
+ !!! CYCLIC PERMUTATIONS 
+ phase(1:3) = 1.d0
+ !!! IDENTITY PERMUTATION 
+ list = (/n,l,k,m,j,i/)
+ idx_list(:,1) = list(:)
+ !!! FIRST CYCLIC PERMUTATION 
+ list = (/n,l,k,j,i,m/)
+ idx_list(:,2) = list(:)
+ !!! FIRST CYCLIC PERMUTATION 
+ list = (/n,l,k,i,m,j/)
+ idx_list(:,3) = list(:)
+
+ !!! NON CYCLIC PERMUTATIONS 
+ phase(1:3) = -1.d0
+ !!! PARTICLE 1 is FIXED
+ list = (/n,l,k,j,m,i/)
+ idx_list(:,4) = list(:)
+ !!! PARTICLE 2 is FIXED
+ list = (/n,l,k,i,j,m/)
+ idx_list(:,5) = list(:)
+ !!! PARTICLE 3 is FIXED
+ list = (/n,l,k,m,i,j/)
+ idx_list(:,6) = list(:)
+
+end
+
+double precision function sym_3_e_int_from_6_idx_tensor(n,l,k,m,j,i)
+ implicit none
+ BEGIN_DOC
+ ! returns all good combinations of permutations of integrals with the good signs 
+ !
+ ! for a given (k^dagger l^dagger n^dagger m j i)  <nlk|L|mji> when all indices have the same spins
+ END_DOC
+ integer, intent(in)  :: n,l,k,m,j,i
+ sym_3_e_int_from_6_idx_tensor = three_body_ints_bi_ort(n,l,k,m,j,i) & ! direct 
+                               + three_body_ints_bi_ort(n,l,k,j,i,m) & ! 1st cyclic permutation  
+                               + three_body_ints_bi_ort(n,l,k,i,m,j) & ! 2nd cyclic permutation  
+                               - three_body_ints_bi_ort(n,l,k,j,m,i) & ! elec 1 is kept fixed 
+                               - three_body_ints_bi_ort(n,l,k,i,j,m) & ! elec 2 is kept fixed
+                               - three_body_ints_bi_ort(n,l,k,m,i,j)   ! elec 3 is kept fixed
+
+end
+
+double precision function direct_sym_3_e_int(n,l,k,m,j,i)
+ implicit none
+ BEGIN_DOC
+ ! returns all good combinations of permutations of integrals with the good signs 
+ !
+ ! for a given (k^dagger l^dagger n^dagger m j i)  <nlk|L|mji> when all indices have the same spins
+ END_DOC
+ integer, intent(in)  :: n,l,k,m,j,i
+ double precision :: integral
+ direct_sym_3_e_int = 0.d0
+ call give_integrals_3_body_bi_ort(n,l,k,m,j,i,integral)   ! direct 
+ direct_sym_3_e_int += integral
+ call give_integrals_3_body_bi_ort(n,l,k,j,i,m,integral)   ! 1st cyclic permutation  
+ direct_sym_3_e_int += integral
+ call give_integrals_3_body_bi_ort(n,l,k,i,m,j,integral)   ! 2nd cyclic permutation  
+ direct_sym_3_e_int += integral
+ call give_integrals_3_body_bi_ort(n,l,k,j,m,i,integral)   ! elec 1 is kept fixed 
+ direct_sym_3_e_int += -integral
+ call give_integrals_3_body_bi_ort(n,l,k,i,j,m,integral)   ! elec 2 is kept fixed
+ direct_sym_3_e_int += -integral
+ call give_integrals_3_body_bi_ort(n,l,k,m,i,j,integral)   ! elec 3 is kept fixed
+ direct_sym_3_e_int += -integral
+
+end
+
+double precision function three_e_diag_parrallel_spin(m,j,i)
+ implicit none
+ integer, intent(in) :: i,j,m
+  three_e_diag_parrallel_spin = three_e_3_idx_direct_bi_ort(m,j,i)  ! direct
+  three_e_diag_parrallel_spin += three_e_3_idx_cycle_1_bi_ort(m,j,i) + three_e_3_idx_cycle_2_bi_ort(m,j,i) & ! two cyclic permutations 
+  - three_e_3_idx_exch23_bi_ort(m,j,i) - three_e_3_idx_exch13_bi_ort(m,j,i)  & ! two first exchange 
+  - three_e_3_idx_exch12_bi_ort(m,j,i) ! last exchange 
+end
+
+double precision function three_e_single_parrallel_spin(m,j,k,i)
+ implicit none
+ integer, intent(in) :: i,k,j,m
+  three_e_single_parrallel_spin = three_e_4_idx_direct_bi_ort(m,j,k,i)  ! direct
+  three_e_single_parrallel_spin += three_e_4_idx_cycle_1_bi_ort(m,j,k,i) + three_e_4_idx_cycle_2_bi_ort(m,j,k,i) & ! two cyclic permutations 
+  - three_e_4_idx_exch23_bi_ort(m,j,k,i) - three_e_4_idx_exch13_bi_ort(m,j,k,i)  & ! two first exchange 
+  - three_e_4_idx_exch12_bi_ort(m,j,k,i) ! last exchange 
+end
+
+double precision function three_e_double_parrallel_spin(m,l,j,k,i)
+ implicit none
+ integer, intent(in) :: i,k,j,m,l
+  three_e_double_parrallel_spin = three_e_5_idx_direct_bi_ort(m,l,j,k,i)  ! direct
+  three_e_double_parrallel_spin += three_e_5_idx_cycle_1_bi_ort(m,l,j,k,i) + three_e_5_idx_cycle_2_bi_ort(m,l,j,k,i) & ! two cyclic permutations 
+  - three_e_5_idx_exch23_bi_ort(m,l,j,k,i) - three_e_5_idx_exch13_bi_ort(m,l,j,k,i)  & ! two first exchange 
+  - three_e_5_idx_exch12_bi_ort(m,l,j,k,i) ! last exchange 
+end
--- a/src/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f
+++ b/src/tc_bi_ortho/symmetrized_3_e_int_prov.irp.f
@ -0,0 +1,140 @@
+
+BEGIN_PROVIDER [ double precision, three_e_diag_parrallel_spin_prov, (mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator ON A BI ORTHONORMAL BASIS 
+  !
+  ! three_e_diag_parrallel_spin_prov(m,j,i) = All combinations of the form <mji|-L|mji> for same spin matrix elements  
+  ! 
+  ! notice the -1 sign: in this way three_e_diag_parrallel_spin_prov can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i, j, m
+  double precision :: integral, wall1, wall0, three_e_diag_parrallel_spin
+
+  three_e_diag_parrallel_spin_prov = 0.d0
+  print *, ' Providing the three_e_diag_parrallel_spin_prov ...'
+
+ integral = three_e_diag_parrallel_spin(1,1,1) ! to provide all stuffs
+  call wall_time(wall0)
+ !$OMP PARALLEL                 &
+ !$OMP DEFAULT (NONE)           &
+ !$OMP PRIVATE (i,j,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_diag_parrallel_spin_prov)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = j, mo_num
+        three_e_diag_parrallel_spin_prov(m,j,i) =  three_e_diag_parrallel_spin(m,j,i)
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  do i = 1, mo_num
+    do j = 1, mo_num
+      do m = 1, j
+        three_e_diag_parrallel_spin_prov(m,j,i) = three_e_diag_parrallel_spin_prov(j,m,i)
+      enddo
+    enddo
+  enddo
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_diag_parrallel_spin_prov', wall1 - wall0
+
+END_PROVIDER 
+
+BEGIN_PROVIDER [ double precision, three_e_single_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF SINGLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_single_parrallel_spin_prov(m,j,k,i) = All combination of <mjk|-L|mji> for same spin matrix elements 
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  !
+  END_DOC
+
+ implicit none
+ integer          :: i, j, k, m
+ double precision :: integral, wall1, wall0, three_e_single_parrallel_spin
+
+  three_e_single_parrallel_spin_prov = 0.d0
+  print *, ' Providing the three_e_single_parrallel_spin_prov ...'
+
+  integral = three_e_single_parrallel_spin(1,1,1,1)
+  call wall_time(wall0)
+ !$OMP PARALLEL                   &
+ !$OMP DEFAULT (NONE)             &
+ !$OMP PRIVATE (i,j,k,m,integral) & 
+ !$OMP SHARED (mo_num,three_e_single_parrallel_spin_prov)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do m = 1, mo_num
+          three_e_single_parrallel_spin_prov(m,j,k,i) = three_e_single_parrallel_spin(m,j,k,i)
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_single_parrallel_spin_prov', wall1 - wall0
+
+END_PROVIDER 
+
+
+! ---
+
+BEGIN_PROVIDER [ double precision, three_e_double_parrallel_spin_prov, (mo_num, mo_num, mo_num, mo_num, mo_num)]
+
+  BEGIN_DOC
+  !
+  ! matrix element of the -L  three-body operator FOR THE DIRECT TERMS OF DOUBLE EXCITATIONS AND BI ORTHO MOs
+  !
+  ! three_e_double_parrallel_spin_prov(m,l,j,k,i) = <mlk|-L|mji> ::: notice that i is the RIGHT MO and k is the LEFT MO
+  !
+  ! notice the -1 sign: in this way three_e_3_idx_direct_bi_ort can be directly used to compute Slater rules with a + sign
+  END_DOC
+
+  implicit none
+  integer          :: i, j, k, m, l
+  double precision :: integral, wall1, wall0, three_e_double_parrallel_spin
+
+  three_e_double_parrallel_spin_prov = 0.d0
+  print *, ' Providing the three_e_double_parrallel_spin_prov ...'
+  call wall_time(wall0)
+
+ integral = three_e_double_parrallel_spin(1,1,1,1,1)
+ !$OMP PARALLEL                     &
+ !$OMP DEFAULT (NONE)               &
+ !$OMP PRIVATE (i,j,k,m,l,integral) & 
+ !$OMP SHARED (mo_num,three_e_double_parrallel_spin_prov)
+ !$OMP DO SCHEDULE (dynamic)
+  do i = 1, mo_num
+    do k = 1, mo_num
+      do j = 1, mo_num
+        do l = 1, mo_num
+          do m = 1, mo_num
+            three_e_double_parrallel_spin_prov(m,l,j,k,i) = three_e_double_parrallel_spin(m,l,j,k,i)
+          enddo
+        enddo
+      enddo
+    enddo
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  call wall_time(wall1)
+  print *, ' wall time for three_e_double_parrallel_spin_prov', wall1 - wall0
+
+END_PROVIDER 
+
--- a/src/tc_bi_ortho/tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f
@ -0,0 +1,61 @@
+program tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. 
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+  call routine_diag
+! call test
+end
+
+subroutine test
+ implicit none
+ integer :: i,j 
+ double precision :: hmono,htwoe,hthree,htot
+ use bitmasks
+
+ print*,'test'
+! call htilde_mu_mat_bi_ortho(psi_det(1,1,1), psi_det(1,1,2), N_int, hmono, htwoe, hthree, htot)
+ call double_htilde_mu_mat_bi_ortho(N_int,psi_det(1,1,1), psi_det(1,1,2), hmono, htwoe, htot)
+ print*,hmono, htwoe,  htot
+
+end
+
+subroutine routine_diag
+ implicit none
+! provide eigval_right_tc_bi_orth
+  provide overlap_bi_ortho
+!  provide htilde_matrix_elmt_bi_ortho
+ integer ::i,j
+ print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1)
+ print*,'e_tc_left_right         = ',e_tc_left_right
+ print*,'e_tilde_bi_orth_00      = ',e_tilde_bi_orth_00
+ print*,'e_pt2_tc_bi_orth        = ',e_pt2_tc_bi_orth
+ print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single
+ print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double
+ print*,'***'
+ print*,'e_corr_bi_orth          = ',e_corr_bi_orth
+ print*,'e_corr_bi_orth_proj     = ',e_corr_bi_orth_proj
+ print*,'e_corr_single_bi_orth   = ',e_corr_single_bi_orth
+ print*,'e_corr_double_bi_orth   = ',e_corr_double_bi_orth
+ print*,'Left/right eigenvectors'
+ do i = 1,N_det
+  write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1)
+ enddo
+ do j=1,N_states
+   do i=1,N_det
+     psi_l_coef_bi_ortho(i,j) = leigvec_tc_bi_orth(i,j)
+     psi_r_coef_bi_ortho(i,j) = reigvec_tc_bi_orth(i,j)
+   enddo
+ enddo
+ SOFT_TOUCH psi_l_coef_bi_ortho psi_r_coef_bi_ortho
+ call save_tc_bi_ortho_wavefunction
+! call routine_save_left_right_bi_ortho
+end
+
--- a/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f
+++ b/src/tc_bi_ortho/tc_bi_ortho_prop.irp.f
@ -0,0 +1,24 @@
+program tc_bi_ortho_prop
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+!  call routine_diag
+ call test
+end
+
+subroutine test
+ implicit none
+ integer :: i
+ print*,'TC Dipole components'
+ do i= 1, 3
+  print*,tc_bi_ortho_dipole(i,1)
+ enddo
+end
--- a/src/tc_bi_ortho/tc_cisd_sc2.irp.f
+++ b/src/tc_bi_ortho/tc_cisd_sc2.irp.f
@ -0,0 +1,24 @@
+program tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  call test
+end
+
+subroutine test
+ implicit none
+! double precision, allocatable :: dressing_dets(:),e_corr_dets(:)
+! allocate(dressing_dets(N_det),e_corr_dets(N_det))
+! e_corr_dets = 0.d0
+! call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets)
+  provide eigval_tc_cisd_sc2_bi_ortho
+end
--- a/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f
+++ b/src/tc_bi_ortho/tc_cisd_sc2_utils.irp.f
@ -0,0 +1,145 @@
+ BEGIN_PROVIDER [ double precision, reigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)]
+&BEGIN_PROVIDER [ double precision, leigvec_tc_cisd_sc2_bi_ortho, (N_det,N_states)]
+&BEGIN_PROVIDER [ double precision, eigval_tc_cisd_sc2_bi_ortho, (N_states)]
+ implicit none
+ integer :: it,n_real,degree,i,istate
+ double precision :: e_before, e_current,thr, hmono,htwoe,hthree,accu
+ double precision, allocatable :: e_corr_dets(:),h0j(:), h_sc2(:,:), dressing_dets(:)
+ double precision, allocatable :: leigvec_tc_bi_orth_tmp(:,:),reigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:)
+ allocate(leigvec_tc_bi_orth_tmp(N_det,N_det),reigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det))
+ allocate(e_corr_dets(N_det),h0j(N_det),h_sc2(N_det,N_det),dressing_dets(N_det))
+ allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag),eigval_tmp(N_states))
+ dressing_dets = 0.d0
+ do i = 1, N_det
+  call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
+  call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+  if(degree == 1 .or. degree == 2)then
+   call htilde_mu_mat_bi_ortho(HF_bitmask,psi_det(1,1,i),N_int,hmono,htwoe,hthree,h0j(i))
+  endif
+ enddo
+ reigvec_tc_bi_orth_tmp = 0.d0
+ do i = 1, N_det 
+  reigvec_tc_bi_orth_tmp(i,1) = psi_r_coef_bi_ortho(i,1) 
+ enddo
+ vec_tmp = 0.d0
+ do istate = 1, N_states
+  vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate)
+ enddo
+ do istate = N_states+1, n_states_diag
+  vec_tmp(istate,istate) = 1.d0
+ enddo
+ print*,'Diagonalizing the TC CISD '
+ call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+ do i = 1, N_det 
+  e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1)
+ enddo
+ E_before = eigval_tmp(1)
+ print*,'Starting from ',E_before
+
+ e_current = 10.d0
+ thr = 1.d-5
+ it = 0
+ dressing_dets = 0.d0
+  double precision, allocatable :: H_jj(:),vec_tmp(:,:),eigval_tmp(:)
+  external                         htc_bi_ortho_calc_tdav
+  external                         htcdag_bi_ortho_calc_tdav
+  logical                       :: converged
+ do while (dabs(E_before-E_current).gt.thr)
+  it += 1
+  E_before = E_current
+!  h_sc2 = htilde_matrix_elmt_bi_ortho
+  call get_cisd_sc2_dressing(psi_det,e_corr_dets,N_det,dressing_dets)
+  do i = 1, N_det
+!   print*,'dressing_dets(i) = ',dressing_dets(i)
+   h_sc2(i,i) += dressing_dets(i)
+  enddo
+  print*,'********************'
+  print*,'iteration       ',it
+!  call non_hrmt_real_diag(N_det,h_sc2,& 
+!       leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& 
+!       n_real,eigval_right_tmp)
+!  print*,'eigval_right_tmp(1)',eigval_right_tmp(1)
+  vec_tmp = 0.d0
+  do istate = 1, N_states
+   vec_tmp(:,istate) = reigvec_tc_bi_orth_tmp(:,istate)
+  enddo
+  do istate = N_states+1, n_states_diag
+   vec_tmp(istate,istate) = 1.d0
+  enddo
+  call davidson_general_diag_dressed_ext_rout_nonsym_b1space(vec_tmp, H_jj, dressing_dets,eigval_tmp, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+  print*,'outside Davidson'
+  print*,'eigval_tmp(1) = ',eigval_tmp(1)
+  do i = 1, N_det 
+   reigvec_tc_bi_orth_tmp(i,1) = vec_tmp(i,1)
+   e_corr_dets(i) = reigvec_tc_bi_orth_tmp(i,1) * h0j(i)/reigvec_tc_bi_orth_tmp(1,1)
+  enddo
+!  E_current = eigval_right_tmp(1)
+  E_current = eigval_tmp(1)
+  print*,'it, E(SC)^2 = ',it,E_current
+ enddo
+ eigval_tc_cisd_sc2_bi_ortho(1:N_states) = eigval_right_tmp(1:N_states)
+ reigvec_tc_cisd_sc2_bi_ortho(1:N_det,1:N_states) = reigvec_tc_bi_orth_tmp(1:N_det,1:N_states)
+ leigvec_tc_cisd_sc2_bi_ortho(1:N_det,1:N_states) = leigvec_tc_bi_orth_tmp(1:N_det,1:N_states)
+ 
+END_PROVIDER 
+
+subroutine get_cisd_sc2_dressing(dets,e_corr_dets,ndet,dressing_dets)
+ implicit none
+  use bitmasks
+ integer, intent(in) :: ndet
+ integer(bit_kind), intent(in)  :: dets(N_int,2,ndet)
+ double precision, intent(in)   :: e_corr_dets(ndet)
+ double precision, intent(out) :: dressing_dets(ndet)
+ integer, allocatable  :: degree(:),hole(:,:),part(:,:),spin(:,:)
+ integer(bit_kind), allocatable :: hole_part(:,:,:)
+ integer :: i,j,k, exc(0:2,2,2),h1,p1,h2,p2,s1,s2
+ integer(bit_kind) :: xorvec(2,N_int)
+
+ double precision :: phase
+ dressing_dets = 0.d0
+ allocate(degree(ndet),hole(2,ndet),part(2,ndet), spin(2,ndet),hole_part(N_int,2,ndet))
+ do i = 2, ndet
+  call get_excitation_degree(HF_bitmask,dets(1,1,i),degree(i),N_int)
+  do j = 1, N_int
+   hole_part(j,1,i) = xor( HF_bitmask(j,1), dets(j,1,i))
+   hole_part(j,2,i) = xor( HF_bitmask(j,2), dets(j,2,i))
+  enddo
+  if(degree(i) == 1)then
+   call get_single_excitation(HF_bitmask,psi_det(1,1,i),exc,phase,N_int)
+  else if(degree(i) == 2)then
+   call get_double_excitation(HF_bitmask,psi_det(1,1,i),exc,phase,N_int)
+  endif
+  call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
+  hole(1,i) = h1
+  hole(2,i) = h2
+  part(1,i) = p1
+  part(2,i) = p2
+  spin(1,i) = s1
+  spin(2,i) = s2
+ enddo
+ 
+ integer :: same
+ if(elec_alpha_num+elec_beta_num<3)return
+ do i = 2, ndet
+  do j = i+1, ndet
+   same = 0
+   if(degree(i) == degree(j) .and. degree(i)==1)cycle
+   do k = 1, N_int
+    xorvec(k,1) = iand(hole_part(k,1,i),hole_part(k,1,j))
+    xorvec(k,2) = iand(hole_part(k,2,i),hole_part(k,2,j))
+    same += popcnt(xorvec(k,1)) + popcnt(xorvec(k,2)) 
+   enddo
+!   print*,'i,j',i,j
+!   call debug_det(dets(1,1,i),N_int) 
+!   call debug_det(hole_part(1,1,i),N_int) 
+!   call debug_det(dets(1,1,j),N_int) 
+!   call debug_det(hole_part(1,1,j),N_int) 
+!   print*,'same = ',same
+   if(same.eq.0)then
+    dressing_dets(i) += e_corr_dets(j)  
+    dressing_dets(j) += e_corr_dets(i)  
+   endif
+  enddo
+ enddo
+ 
+end
--- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f
+++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f
@ -0,0 +1,183 @@
+  use bitmasks
+
+ BEGIN_PROVIDER [ integer, index_HF_psi_det]                                                                                                            
+ implicit none
+ integer :: i,degree
+ do i = 1, N_det
+   call get_excitation_degree(HF_bitmask,psi_det(1,1,i),degree,N_int)
+   if(degree == 0)then
+    index_HF_psi_det = i
+    exit
+   endif
+ enddo
+ END_PROVIDER
+
+
+
+ BEGIN_PROVIDER [double precision, eigval_right_tc_bi_orth, (N_states)]
+&BEGIN_PROVIDER [double precision, eigval_left_tc_bi_orth, (N_states)]
+&BEGIN_PROVIDER [double precision, reigvec_tc_bi_orth, (N_det,N_states)]
+&BEGIN_PROVIDER [double precision, leigvec_tc_bi_orth, (N_det,N_states)]
+&BEGIN_PROVIDER [double precision, norm_ground_left_right_bi_orth ]
+
+  BEGIN_DOC
+  ! eigenvalues, right and left eigenvectors of the transcorrelated Hamiltonian on the BI-ORTHO basis 
+  END_DOC
+
+  implicit none
+  integer                       :: i, idx_dress, j, istate
+  logical                       :: converged, dagger
+  integer                       :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l
+  double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:)
+
+  PROVIDE N_det N_int
+
+   if(n_det.le.N_det_max_full)then
+    allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det))
+    call non_hrmt_real_diag(N_det,htilde_matrix_elmt_bi_ortho,& 
+         leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& 
+         n_real_tc_bi_orth_eigval_right,eigval_right_tmp)
+    double precision, allocatable :: coef_hf_r(:),coef_hf_l(:)
+    integer, allocatable :: iorder(:)
+    allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det))
+    do i = 1,N_det
+     iorder(i) = i
+     coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i))
+    enddo
+    call dsort(coef_hf_r,iorder,N_det)
+    igood_r = iorder(1)
+    print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1)
+    do i = 1,N_det
+     iorder(i) = i
+     coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i))
+    enddo
+    call dsort(coef_hf_l,iorder,N_det)
+    igood_l = iorder(1)
+    print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1)
+
+    if(igood_r.ne.igood_l.and.igood_r.ne.1)then
+     print *,''
+     print *,'Warning, the left and right eigenvectors are "not the same" '
+     print *,'Warning, the ground state is not dominated by HF...'
+     print *,'State with largest RIGHT coefficient of HF ',igood_r
+     print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r)
+     print *,'State with largest LEFT  coefficient of HF ',igood_l
+     print *,'coef of HF in LEFT  eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l)
+    endif
+    if(state_following_tc)then
+     print *,'Following the states with the largest coef on HF'
+     print *,'igood_r,igood_l',igood_r,igood_l
+     i= igood_r
+     eigval_right_tc_bi_orth(1) = eigval_right_tmp(i)
+     do j = 1, N_det
+       reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i)
+!       print*,reigvec_tc_bi_orth(j,1)
+     enddo
+     i= igood_l
+     eigval_left_tc_bi_orth(1)  = eigval_right_tmp(i)
+     do j = 1, N_det
+       leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i)
+     enddo
+    else 
+     do i = 1, N_states
+       eigval_right_tc_bi_orth(i) = eigval_right_tmp(i)
+       eigval_left_tc_bi_orth(i)  = eigval_right_tmp(i)
+       do j = 1, N_det
+         reigvec_tc_bi_orth(j,i) = reigvec_tc_bi_orth_tmp(j,i)
+         leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i)
+       enddo
+     enddo
+    endif
+   else 
+    double precision, allocatable :: H_jj(:),vec_tmp(:,:)
+    external                         htc_bi_ortho_calc_tdav
+    external                         htcdag_bi_ortho_calc_tdav
+    external                         H_tc_u_0_opt
+    external                         H_tc_dagger_u_0_opt
+    allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag))
+    do i = 1, N_det
+      call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
+    enddo
+ !!!! Preparing the left-eigenvector
+    print*,'Computing the left-eigenvector '
+    vec_tmp = 0.d0
+    do istate = 1, N_states
+     vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate)
+    enddo
+    do istate = N_states+1, n_states_diag
+     vec_tmp(istate,istate) = 1.d0
+    enddo
+!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav)
+    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt)
+    do istate = 1, N_states
+     leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
+    enddo
+
+    print*,'Computing the right-eigenvector '
+ !!!! Preparing the right-eigenvector
+    vec_tmp = 0.d0
+    do istate = 1, N_states
+     vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate)
+    enddo
+    do istate = N_states+1, n_states_diag
+     vec_tmp(istate,istate) = 1.d0
+    enddo
+!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt)
+    do istate = 1, N_states
+     reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
+    enddo
+
+    deallocate(H_jj)
+   endif
+  call bi_normalize(leigvec_tc_bi_orth,reigvec_tc_bi_orth,size(reigvec_tc_bi_orth,1),N_det,N_states)
+   print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ',leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1)
+   norm_ground_left_right_bi_orth = 0.d0
+   do j = 1, N_det
+    norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1)
+   enddo
+   print*,'norm l/r = ',norm_ground_left_right_bi_orth
+
+END_PROVIDER 
+
+
+
+subroutine bi_normalize(u_l,u_r,n,ld,nstates)
+  !!!! Normalization of the scalar product of the left/right eigenvectors
+  double precision, intent(inout) :: u_l(ld,nstates), u_r(ld,nstates)
+  integer, intent(in) :: n,ld,nstates
+  integer :: i
+  double precision  :: accu, tmp 
+  do i = 1, nstates
+   !!!! Normalization of right eigenvectors |Phi>
+   accu = 0.d0
+   do j = 1, n
+    accu += u_r(j,i) * u_r(j,i)
+   enddo
+   accu = 1.d0/dsqrt(accu)
+   print*,'accu_r = ',accu
+   do j = 1, n
+    u_r(j,i) *= accu 
+   enddo
+   tmp = u_r(1,i) / dabs(u_r(1,i))
+   do j = 1, n
+    u_r(j,i) *= tmp
+   enddo
+   !!!! Adaptation of the norm of the left eigenvector such that <chi|Phi> = 1
+   accu = 0.d0
+   do j = 1, n
+    accu += u_l(j,i) * u_r(j,i)
+!    print*,j, u_l(j,i) , u_r(j,i)
+   enddo
+   if(accu.gt.0.d0)then
+    accu = 1.d0/dsqrt(accu)
+   else
+    accu = 1.d0/dsqrt(-accu)
+   endif
+   tmp = (u_l(1,i) * u_r(1,i) )/dabs(u_l(1,i) * u_r(1,i))
+   do j = 1, n
+    u_l(j,i) *= accu * tmp
+    u_r(j,i) *= accu 
+   enddo
+  enddo
+end
--- a/src/tc_bi_ortho/tc_hmat.irp.f
+++ b/src/tc_bi_ortho/tc_hmat.irp.f
@ -0,0 +1,45 @@
+
+ BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho, (N_det,N_det)]
+
+  BEGIN_DOC
+  ! htilde_matrix_elmt_bi_ortho(j,i) = <J| H^tilde |I> 
+  !
+  ! WARNING !!!!!!!!! IT IS NOT HERMITIAN !!!!!!!!!
+  END_DOC
+ 
+  implicit none
+  integer          :: i, j
+  double precision :: hmono,htwoe,hthree,htot
+
+  PROVIDE N_int
+ !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hmono, htwoe, hthree, htot) &
+ !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho)
+    do i = 1, N_det
+      do j = 1, N_det
+        ! < J | Htilde | I >
+        call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+
+        !print *, ' hmono  = ', hmono
+        !print *, ' htwoe  = ', htwoe
+        !print *, ' hthree = ', hthree
+        htilde_matrix_elmt_bi_ortho(j,i) = htot
+      enddo
+    enddo
+ !$OMP END PARALLEL DO
+! print*,'htilde_matrix_elmt_bi_ortho = '
+! do i = 1, min(100,N_det)
+!  write(*,'(100(F16.10,X))')htilde_matrix_elmt_bi_ortho(1:min(100,N_det),i)
+! enddo
+
+
+END_PROVIDER 
+
+ BEGIN_PROVIDER [double precision, htilde_matrix_elmt_bi_ortho_tranp, (N_det,N_det)]
+ implicit none
+ integer ::i,j
+  do i = 1, N_det
+    do j = 1, N_det
+      htilde_matrix_elmt_bi_ortho_tranp(j,i) = htilde_matrix_elmt_bi_ortho(i,j)
+    enddo
+  enddo
+END_PROVIDER 
--- a/src/tc_bi_ortho/tc_natorb.irp.f
+++ b/src/tc_bi_ortho/tc_natorb.irp.f
@ -0,0 +1,218 @@
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_mo, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, natorb_tc_eigval, (mo_num)]
+
+  BEGIN_DOC
+  !
+  ! natorb_tc_reigvec_mo : RIGHT eigenvectors of the ground state transition matrix (equivalent of natural orbitals)
+  ! natorb_tc_leigvec_mo : LEFT  eigenvectors of the ground state transition matrix (equivalent of natural orbitals)
+  ! natorb_tc_eigval     : eigenvalues of the ground state transition matrix (equivalent of the occupation numbers). WARNINING :: can be negative !!
+  !
+  END_DOC
+
+  implicit none
+  integer                       :: i, j, k
+  double precision              :: thr_d, thr_nd, thr_deg, accu
+  double precision              :: accu_d, accu_nd
+  double precision, allocatable :: dm_tmp(:,:), fock_diag(:)
+
+  allocate(dm_tmp(mo_num,mo_num), fock_diag(mo_num))
+
+  dm_tmp(:,:) = -tc_transition_matrix(:,:,1,1)
+
+  print *, ' dm_tmp'
+  do i = 1, mo_num
+    fock_diag(i) = fock_matrix_tc_mo_tot(i,i)
+    write(*, '(100(F16.10,X))') -dm_tmp(:,i)
+  enddo
+
+  thr_d   = 1.d-6
+  thr_nd  = 1.d-6
+  thr_deg = 1.d-3
+  call diag_mat_per_fock_degen( fock_diag, dm_tmp, mo_num, thr_d, thr_nd, thr_deg & 
+                              , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo, natorb_tc_eigval)
+!   call non_hrmt_bieig( mo_num, dm_tmp&
+!                      , natorb_tc_leigvec_mo, natorb_tc_reigvec_mo& 
+!                      , mo_num, natorb_tc_eigval )
+
+  accu = 0.d0
+  do i = 1, mo_num
+    print*,'natorb_tc_eigval(i) = ',-natorb_tc_eigval(i)
+    accu += -natorb_tc_eigval(i)
+  enddo
+  print *, ' accu = ', accu
+
+  dm_tmp = 0.d0
+  do i = 1, mo_num
+    accu = 0.d0
+    do k = 1, mo_num
+      accu += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,i)
+    enddo
+    accu = 1.d0/dsqrt(dabs(accu))
+    natorb_tc_reigvec_mo(:,i) *= accu
+    natorb_tc_leigvec_mo(:,i) *= accu
+    do j = 1, mo_num
+      do k = 1, mo_num
+        dm_tmp(j,i) += natorb_tc_reigvec_mo(k,i) * natorb_tc_leigvec_mo(k,j)
+      enddo
+    enddo
+  enddo
+
+  accu_d  = 0.d0
+  accu_nd = 0.d0
+  do i = 1, mo_num
+    accu_d += dm_tmp(i,i)
+    !write(*,'(100(F16.10,X))')dm_tmp(:,i)
+    do j = 1, mo_num
+      if(i==j)cycle
+      accu_nd += dabs(dm_tmp(j,i))
+    enddo
+  enddo
+  print *, ' Trace of the overlap between TC natural orbitals     ', accu_d
+  print *, ' L1 norm of extra diagonal elements of overlap matrix ', accu_nd
+
+  deallocate(dm_tmp, fock_diag)
+ 
+END_PROVIDER 
+
+! ---
+ 
+ BEGIN_PROVIDER [ double precision, fock_diag_sorted_r_natorb, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, fock_diag_sorted_l_natorb, (mo_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, fock_diag_sorted_v_natorb, (mo_num)]
+
+  implicit none
+  integer                       :: i,j,k
+  integer,          allocatable :: iorder(:)
+  double precision, allocatable :: fock_diag(:)
+
+  print *, ' Diagonal elements of the Fock matrix before '
+
+  do i = 1, mo_num
+   write(*,*) i, Fock_matrix_tc_mo_tot(i,i)
+  enddo
+
+  allocate(fock_diag(mo_num))
+  fock_diag = 0.d0
+  do i = 1, mo_num
+    fock_diag(i) = 0.d0
+    do j = 1, mo_num
+      do k = 1, mo_num
+        fock_diag(i) += natorb_tc_leigvec_mo(k,i) * Fock_matrix_tc_mo_tot(k,j) * natorb_tc_reigvec_mo(j,i) 
+      enddo
+    enddo
+  enddo
+
+  allocate(iorder(mo_num))
+  do i = 1, mo_num
+   iorder(i) = i
+  enddo 
+  call dsort(fock_diag, iorder, mo_num)
+
+  print *, ' Diagonal elements of the Fock matrix after '
+  do i = 1, mo_num
+   write(*,*) i, fock_diag(i)
+  enddo
+  deallocate(fock_diag)
+
+  do i = 1, mo_num 
+    fock_diag_sorted_v_natorb(i) = natorb_tc_eigval(iorder(i))
+    do j = 1, mo_num
+      fock_diag_sorted_r_natorb(j,i) = natorb_tc_reigvec_mo(j,iorder(i))
+      fock_diag_sorted_l_natorb(j,i) = natorb_tc_leigvec_mo(j,iorder(i))
+    enddo
+  enddo
+  deallocate(iorder)
+ 
+END_PROVIDER 
+ 
+! --- 
+ 
+ BEGIN_PROVIDER [ double precision, natorb_tc_reigvec_ao, (ao_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, natorb_tc_leigvec_ao, (ao_num, mo_num)]
+&BEGIN_PROVIDER [ double precision, overlap_natorb_tc_eigvec_ao, (mo_num, mo_num) ]
+ 
+  BEGIN_DOC
+  ! EIGENVECTORS OF FOCK MATRIX ON THE AO BASIS and their OVERLAP
+  !
+  ! THE OVERLAP SHOULD BE THE SAME AS overlap_natorb_tc_eigvec_mo
+  END_DOC
+ 
+  implicit none
+  integer                       :: i, j, k, q, p
+  double precision              :: accu, accu_d
+  double precision, allocatable :: tmp(:,:)
+ 
+ 
+ !  ! MO_R x R
+   call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
+             , mo_r_coef, size(mo_r_coef, 1)                   &
+             , fock_diag_sorted_r_natorb, size(fock_diag_sorted_r_natorb, 1) &
+             , 0.d0, natorb_tc_reigvec_ao, size(natorb_tc_reigvec_ao, 1) )
+ !
+   ! MO_L x L
+   call dgemm( 'N', 'N', ao_num, mo_num, mo_num, 1.d0          &
+             , mo_l_coef, size(mo_l_coef, 1)                   &
+             , fock_diag_sorted_l_natorb, size(fock_diag_sorted_l_natorb, 1) &
+             , 0.d0, natorb_tc_leigvec_ao, size(natorb_tc_leigvec_ao, 1) )
+ 
+ 
+   allocate( tmp(mo_num,ao_num) )
+ 
+   ! tmp <-- L.T x S_ao
+   call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                           &
+             , natorb_tc_leigvec_ao, size(natorb_tc_leigvec_ao, 1), ao_overlap, size(ao_overlap, 1) &
+             , 0.d0, tmp, size(tmp, 1) )
+ 
+   ! S <-- tmp x R
+   call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                             &
+             , tmp, size(tmp, 1), natorb_tc_reigvec_ao, size(natorb_tc_reigvec_ao, 1) &
+             , 0.d0, overlap_natorb_tc_eigvec_ao, size(overlap_natorb_tc_eigvec_ao, 1) )
+ 
+   deallocate( tmp )
+ 
+   ! ---
+   double precision :: norm
+   do i = 1, mo_num
+    norm = 1.d0/dsqrt(dabs(overlap_natorb_tc_eigvec_ao(i,i)))
+    do j = 1, mo_num
+     natorb_tc_reigvec_ao(j,i) *= norm
+     natorb_tc_leigvec_ao(j,i) *= norm
+    enddo
+   enddo
+ 
+   allocate( tmp(mo_num,ao_num) )
+ 
+   ! tmp <-- L.T x S_ao
+   call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                           &
+             , natorb_tc_leigvec_ao, size(natorb_tc_leigvec_ao, 1), ao_overlap, size(ao_overlap, 1) &
+             , 0.d0, tmp, size(tmp, 1) )
+ 
+   ! S <-- tmp x R
+   call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                             &
+             , tmp, size(tmp, 1), natorb_tc_reigvec_ao, size(natorb_tc_reigvec_ao, 1) &
+             , 0.d0, overlap_natorb_tc_eigvec_ao, size(overlap_natorb_tc_eigvec_ao, 1) )
+ 
+ 
+ 
+   deallocate( tmp )
+ 
+   accu_d = 0.d0
+   accu = 0.d0
+   do i = 1, mo_num
+     accu_d += overlap_natorb_tc_eigvec_ao(i,i)
+     do j = 1, mo_num
+       if(i==j)cycle
+       accu += dabs(overlap_natorb_tc_eigvec_ao(j,i))
+     enddo
+   enddo
+   print*,'Trace of the overlap_natorb_tc_eigvec_ao           = ',accu_d
+   print*,'mo_num                                             = ',mo_num
+   print*,'L1 norm of extra diagonal elements of overlap matrix ',accu
+   accu = accu / dble(mo_num**2)
+ 
+ END_PROVIDER
+
--- a/src/tc_bi_ortho/tc_prop.irp.f
+++ b/src/tc_bi_ortho/tc_prop.irp.f
@ -0,0 +1,80 @@
+
+BEGIN_PROVIDER [ double precision, tc_transition_matrix, (mo_num, mo_num,N_states,N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! tc_transition_matrix(p,h,istate,jstate) = <Chi_istate| a^\dagger_p a_h |Phi_jstate>
+ !
+ ! where <Chi_istate| and |Phi_jstate> are the left/right eigenvectors on a bi-ortho basis
+ END_DOC
+ integer :: i,j,istate,jstate,m,n,p,h
+ double precision :: phase
+ integer, allocatable           :: occ(:,:)
+ integer                        :: n_occ_ab(2),degree,exc(0:2,2,2)
+ allocate(occ(N_int*bit_kind_size,2))
+ tc_transition_matrix = 0.d0
+ do istate = 1, N_states
+  do jstate = 1, N_states
+   do i = 1, N_det
+    do j = 1, N_det
+     call get_excitation_degree(psi_det(1,1,i),psi_det(1,1,j),degree,N_int)
+     if(degree.gt.1)then
+      cycle
+     else if (degree == 0)then
+      call bitstring_to_list_ab(psi_det(1,1,i), occ, n_occ_ab, N_int)
+      do p = 1, n_occ_ab(1) ! browsing the alpha electrons
+       m = occ(p,1)
+       tc_transition_matrix(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate)
+      enddo
+      do p = 1, n_occ_ab(2) ! browsing the beta electrons
+       m = occ(p,1)
+       tc_transition_matrix(m,m,istate,jstate)+= psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate)
+      enddo
+     else
+      call get_single_excitation(psi_det(1,1,j),psi_det(1,1,i),exc,phase,N_int)
+      if (exc(0,1,1) == 1) then
+        ! Single alpha
+        h = exc(1,1,1) ! hole in psi_det(1,1,j) 
+        p = exc(1,2,1) ! particle in psi_det(1,1,j) 
+      else
+        ! Single beta
+        h = exc(1,1,2) ! hole in psi_det(1,1,j) 
+        p = exc(1,2,2) ! particle in psi_det(1,1,j) 
+      endif
+      tc_transition_matrix(p,h,istate,jstate)+= phase * psi_l_coef_bi_ortho(i,istate) * psi_r_coef_bi_ortho(j,jstate)
+     endif
+    enddo
+   enddo
+  enddo
+ enddo
+ END_PROVIDER
+
+
+ BEGIN_PROVIDER [double precision, tc_bi_ortho_dipole, (3,N_states)]
+ implicit none
+ integer :: i,j,istate,m
+ double precision :: nuclei_part(3)
+ tc_bi_ortho_dipole = 0.d0
+ do istate = 1, N_states
+  do i = 1, mo_num
+   do j = 1, mo_num
+    tc_bi_ortho_dipole(1,istate) += -(tc_transition_matrix(j,i,istate,istate)) *  mo_bi_orth_bipole_x(j,i)
+    tc_bi_ortho_dipole(2,istate) += -(tc_transition_matrix(j,i,istate,istate)) *  mo_bi_orth_bipole_y(j,i)
+    tc_bi_ortho_dipole(3,istate) += -(tc_transition_matrix(j,i,istate,istate)) *  mo_bi_orth_bipole_z(j,i)
+   enddo
+  enddo
+ enddo
+
+ nuclei_part = 0.d0
+ do m = 1, 3
+  do i = 1,nucl_num
+   nuclei_part(m) += nucl_charge(i) * nucl_coord(i,m)
+  enddo
+ enddo
+!
+ do istate = 1, N_states
+  do m = 1, 3
+    tc_bi_ortho_dipole(m,istate) += nuclei_part(m)
+  enddo
+ enddo
+ END_PROVIDER
+
--- a/src/tc_bi_ortho/tc_som.irp.f
+++ b/src/tc_bi_ortho/tc_som.irp.f
@ -0,0 +1,70 @@
+! ---
+
+program tc_som
+
+  BEGIN_DOC
+  ! TODO : Put the documentation of the program here
+  END_DOC
+
+  implicit none
+
+  print *, ' starting ...'
+  print *, ' do not forget to do tc-scf first'
+
+  my_grid_becke  = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+!  my_n_pt_r_grid = 10 ! small grid for quick debug
+!  my_n_pt_a_grid = 26 ! small grid for quick debug
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  PROVIDE mu_erf 
+  print *, ' mu = ', mu_erf
+  PROVIDE j1b_type
+  print *, ' j1b_type = ', j1b_type
+  print *, j1b_pen
+
+  read_wf = .true.
+  touch read_wf
+
+  call main()
+
+end
+
+! ---
+
+subroutine main()
+
+  implicit none
+  integer          :: i, i_HF, degree
+  double precision :: hmono_1, htwoe_1, hthree_1, htot_1
+  double precision :: hmono_2, htwoe_2, hthree_2, htot_2
+  double precision :: U_SOM
+
+  PROVIDE N_int N_det
+
+  do i = 1, N_det
+    call get_excitation_degree(HF_bitmask, psi_det(1,1,i), degree, N_int)
+    if(degree == 0) then
+      i_HF = i
+      exit
+    endif
+  enddo
+  print *, ' HF determinants:', i_HF
+  print *, '          N_det :', N_det
+
+  U_SOM = 0.d0 
+  do i = 1, N_det
+    if(i == i_HF) cycle
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i_HF), psi_det(1,1,i), N_int, hmono_1, htwoe_1, hthree_1, htot_1)
+    call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,i_HF), N_int, hmono_2, htwoe_2, hthree_2, htot_2)
+    U_SOM += htot_1 * htot_2
+  enddo
+  U_SOM = 0.5d0 * U_SOM
+  print *, ' U_SOM = ', U_SOM
+  
+  return
+end subroutine main
+
+! ---
+
--- a/src/tc_bi_ortho/test_natorb.irp.f
+++ b/src/tc_bi_ortho/test_natorb.irp.f
@ -0,0 +1,51 @@
+program test_natorb
+  implicit none
+  BEGIN_DOC
+! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. 
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+ call routine
+! call test
+
+end
+
+subroutine routine
+ implicit none
+ double precision, allocatable :: fock_diag(:),eigval(:),leigvec(:,:),reigvec(:,:),mat_ref(:,:)
+ allocate(eigval(mo_num),leigvec(mo_num,mo_num),reigvec(mo_num,mo_num),fock_diag(mo_num),mat_ref(mo_num, mo_num))
+ double precision, allocatable :: eigval_ref(:),leigvec_ref(:,:),reigvec_ref(:,:)
+ allocate(eigval_ref(mo_num),leigvec_ref(mo_num,mo_num),reigvec_ref(mo_num,mo_num))
+ 
+ double precision :: thr_deg
+ integer :: i,n_real,j
+ print*,'fock_matrix'
+ do i = 1, mo_num
+  fock_diag(i) = Fock_matrix_mo(i,i)
+  print*,i,fock_diag(i)
+ enddo
+ thr_deg = 1.d-6
+ mat_ref = -one_e_dm_mo
+ print*,'diagonalization by block'
+ call diag_mat_per_fock_degen(fock_diag,mat_ref,mo_num,thr_deg,leigvec,reigvec,eigval)
+ call non_hrmt_bieig( mo_num, mat_ref&
+                     , leigvec_ref, reigvec_ref& 
+                     , n_real, eigval_ref)
+ print*,'TEST ***********************************'
+ double precision :: accu_l, accu_r
+ do i = 1, mo_num
+  accu_l = 0.d0
+  accu_r = 0.d0
+  do j = 1, mo_num
+   accu_r += reigvec_ref(j,i) * reigvec(j,i)
+   accu_l += leigvec_ref(j,i) * leigvec(j,i)
+  enddo
+  print*,i
+  write(*,'(I3,X,100(F16.10,X))')i,eigval(i),eigval_ref(i),accu_l,accu_r
+ enddo
+end
--- a/src/tc_bi_ortho/test_normal_order.irp.f
+++ b/src/tc_bi_ortho/test_normal_order.irp.f
@ -0,0 +1,131 @@
+program test_normal_order
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+  call provide_all_three_ints_bi_ortho
+  call test
+end
+
+subroutine test
+ implicit none
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+ integer :: h1,h2,p1,p2,s1,s2,i_ok,degree,Ne(2)
+ integer                        :: exc(0:2,2,2)
+ integer(bit_kind), allocatable :: det_i(:,:)
+ double precision :: hmono,htwoe,hthree,htilde_ij,accu,phase,normal
+ integer,           allocatable :: occ(:,:)
+ allocate( occ(N_int*bit_kind_size,2) )
+ call bitstring_to_list_ab(ref_bitmask, occ, Ne, N_int)
+ allocate(det_i(N_int,2))
+ s1 = 1 
+ s2 = 2
+ accu = 0.d0
+ do h1 = 1, elec_beta_num
+  do p1 = elec_alpha_num+1, mo_num
+   do h2 = 1, elec_beta_num
+    do p2 = elec_beta_num+1, mo_num
+     det_i = ref_bitmask
+     call do_single_excitation(det_i,h1,p1,s1,i_ok)
+     call do_single_excitation(det_i,h2,p2,s2,i_ok)
+     call htilde_mu_mat_bi_ortho(det_i,HF_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+     call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
+     call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
+     hthree *= phase
+!    !normal = normal_two_body_bi_orth_ab(p2,h2,p1,h1)
+     call three_comp_two_e_elem(det_i,h1,h2,p1,p2,s1,s2,normal)
+!     normal = eff_2_e_from_3_e_ab(p2,p1,h2,h1)
+     accu += dabs(hthree-normal)
+    enddo
+   enddo
+  enddo
+ enddo
+print*,'accu opposite spin = ',accu
+stop
+
+!    p2=6
+!    p1=5
+!    h2=2
+!    h1=1
+
+s1 = 1 
+s2 = 1
+accu = 0.d0
+do h1 = 1, elec_alpha_num
+ do p1 = elec_alpha_num+1, mo_num
+  do p2 = p1+1, mo_num
+   do h2 = h1+1, elec_alpha_num
+    det_i = ref_bitmask
+    call do_single_excitation(det_i,h1,p1,s1,i_ok)
+    if(i_ok.ne.1)cycle
+    call do_single_excitation(det_i,h2,p2,s2,i_ok)
+    if(i_ok.ne.1)cycle
+    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
+    call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
+    integer :: hh1, pp1, hh2, pp2, ss1, ss2
+    call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2)
+    hthree *= phase
+!    normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1)
+     normal = eff_2_e_from_3_e_aa(p2,p1,h2,h1)
+    if(dabs(hthree).lt.1.d-10)cycle
+    if(dabs(hthree-normal).gt.1.d-10)then
+     print*,pp2,pp1,hh2,hh1
+     print*,p2,p1,h2,h1
+     print*,hthree,normal,dabs(hthree-normal)
+     stop
+    endif
+!     print*,hthree,normal,dabs(hthree-normal)
+    accu += dabs(hthree-normal)
+   enddo
+  enddo
+ enddo
+enddo
+print*,'accu same spin alpha = ',accu
+
+
+s1 = 2 
+s2 = 2
+accu = 0.d0
+do h1 = 1, elec_beta_num
+ do p1 = elec_beta_num+1, mo_num
+  do p2 = p1+1, mo_num
+   do h2 = h1+1, elec_beta_num
+    det_i = ref_bitmask
+    call do_single_excitation(det_i,h1,p1,s1,i_ok)
+    if(i_ok.ne.1)cycle
+    call do_single_excitation(det_i,h2,p2,s2,i_ok)
+    if(i_ok.ne.1)cycle
+    call htilde_mu_mat_bi_ortho(det_i,ref_bitmask,N_int,hmono,htwoe,hthree,htilde_ij)
+    call get_excitation_degree(ref_bitmask,det_i,degree,N_int)
+    call get_excitation(ref_bitmask,det_i,exc,degree,phase,N_int)
+    call decode_exc(exc, 2, hh1, pp1, hh2, pp2, ss1, ss2)
+    hthree *= phase
+!    normal = normal_two_body_bi_orth_aa_bb(p2,h2,p1,h1)
+     normal = eff_2_e_from_3_e_bb(p2,p1,h2,h1)
+    if(dabs(hthree).lt.1.d-10)cycle
+    if(dabs(hthree-normal).gt.1.d-10)then
+     print*,pp2,pp1,hh2,hh1
+     print*,p2,p1,h2,h1
+     print*,hthree,normal,dabs(hthree-normal)
+     stop
+    endif
+!     print*,hthree,normal,dabs(hthree-normal)
+    accu += dabs(hthree-normal)
+   enddo
+  enddo
+ enddo
+enddo
+print*,'accu same spin beta  = ',accu
+
+
+end
+
+
--- a/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/test_tc_bi_ortho.irp.f
@ -0,0 +1,254 @@
+program tc_bi_ortho
+  implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+ call test_h_u0
+! call test_slater_tc_opt
+! call timing_tot
+! call timing_diag
+! call timing_single
+! call timing_double
+end
+
+subroutine test_h_u0
+ implicit none
+ double precision, allocatable :: v_0_ref(:),v_0_new(:),u_0(:), v_0_ref_dagger(:)
+ double precision :: accu 
+ logical :: do_right
+ integer :: i
+ allocate(v_0_new(N_det),v_0_ref(N_det),u_0(N_det),v_0_ref_dagger(N_det))
+ do_right = .True.
+ do i = 1, N_det
+  u_0(i) = psi_r_coef_bi_ortho(i,1)
+ enddo
+ call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right)
+ call htc_bi_ortho_calc_tdav (v_0_ref,u_0,N_states,N_det)
+ print*,'difference right '
+ accu = 0.d0
+ do i = 1, N_det
+  print*,dabs(v_0_new(i) - v_0_ref(i)),v_0_new(i) , v_0_ref(i)
+  accu += dabs(v_0_new(i) - v_0_ref(i))
+ enddo
+ print*,'accu = ',accu
+ do_right = .False.
+ v_0_new = 0.d0
+ call H_tc_u_0_nstates_openmp(v_0_new,u_0,N_states,N_det, do_right)
+ call htcdag_bi_ortho_calc_tdav(v_0_ref_dagger,u_0,N_states,N_det, do_right)
+ print*,'difference left'
+ accu = 0.d0
+ do i = 1, N_det
+  print*,dabs(v_0_new(i) - v_0_ref_dagger(i)),v_0_new(i) , v_0_ref_dagger(i)
+  accu += dabs(v_0_new(i) - v_0_ref_dagger(i))
+ enddo
+ print*,'accu = ',accu
+end
+
+subroutine test_slater_tc_opt
+ implicit none
+ integer :: i,j,degree
+ double precision :: hmono, htwoe, htot, hthree 
+ double precision :: hnewmono, hnewtwoe, hnewthree, hnewtot
+ double precision :: accu_d ,i_count, accu
+ accu = 0.d0
+ accu_d = 0.d0
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1,N_det
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hnewmono, hnewtwoe, hnewthree, hnewtot)
+   if(dabs(htot).gt.1.d-15)then
+     i_count += 1.D0
+     accu += dabs(htot-hnewtot) 
+     if(dabs(htot-hnewtot).gt.1.d-8.or.dabs(htot-hnewtot).gt.dabs(htot))then
+      call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+      print*,j,i,degree
+      call debug_det(psi_det(1,1,i),N_int)
+      call debug_det(psi_det(1,1,j),N_int)
+      print*,htot,hnewtot,dabs(htot-hnewtot)
+      print*,hthree,hnewthree,dabs(hthree-hnewthree)
+      stop
+     endif
+   endif
+  enddo
+ enddo
+ print*,'accu   = ',accu/i_count
+
+end
+
+subroutine timing_tot
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,2), N_int, hmono, htwoe, hthree, htot)
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,2), N_int, hmono, htwoe, hthree, htot)
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+!   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   i_count += 1.d0
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for old hij for total   = ',wall1 - wall0
+
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+!   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   i_count += 1.d0
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij for total   = ',wall1 - wall0
+ call i_H_j(psi_det(1,1,1), psi_det(1,1,2),N_int,htot)
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call i_H_j(psi_det(1,1,j), psi_det(1,1,i),N_int,htot)
+   i_count += 1.d0
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij STANDARD    = ',wall1 - wall0
+
+end
+
+subroutine timing_diag
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot)
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = i,i 
+   i_count += 1.d0
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for old hij for diagonal= ',wall1 - wall0
+
+ call wall_time(wall0)
+ i_count = 0.d0
+ do i = 1, N_det
+  do j = i,i
+   i_count += 1.d0
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij for diagonal= ',wall1 - wall0
+
+end
+
+subroutine timing_single
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1,accu
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot)
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.1)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ print*,'i_count = ',i_count
+ print*,'time for old hij for singles = ',accu
+
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.1)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ print*,'i_count = ',i_count
+ print*,'time for new hij for singles = ',accu
+
+end
+
+subroutine timing_double
+ implicit none
+ integer :: i,j
+ double precision :: wall0, wall1,accu
+ double precision, allocatable :: mat_old(:,:),mat_new(:,:)
+ double precision :: hmono, htwoe, hthree, htot, i_count
+ integer :: degree 
+ call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,1), psi_det(1,1,1), N_int, hmono, htwoe, hthree, htot)
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.2)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ print*,'i_count = ',i_count
+ print*,'time for old hij for doubles = ',accu
+
+ i_count = 0.d0
+ accu = 0.d0
+ do i = 1, N_det
+  do j = 1, N_det
+   call get_excitation_degree(psi_det(1,1,j), psi_det(1,1,i),degree,N_int)
+   if(degree.ne.2)cycle
+   i_count += 1.d0
+   call wall_time(wall0)
+   call htilde_mu_mat_opt_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+   call wall_time(wall1)
+   accu += wall1 - wall0
+  enddo
+ enddo
+ call wall_time(wall1)
+ print*,'i_count = ',i_count
+ print*,'time for new hij for doubles = ',accu
+
+end
+
--- a/src/tc_bi_ortho/test_tc_fock.irp.f
+++ b/src/tc_bi_ortho/test_tc_fock.irp.f
@ -0,0 +1,194 @@
+program test_tc_fock
+ implicit none
+  BEGIN_DOC
+! TODO : Put the documentation of the program here
+  END_DOC
+  print *, 'Hello world'
+  my_grid_becke = .True.
+  my_n_pt_r_grid = 30
+  my_n_pt_a_grid = 50
+  read_wf = .True.
+  touch read_wf
+  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  !call routine_1
+  !call routine_2
+!  call routine_3()
+
+! call test_3e
+ call routine_tot
+end
+
+! ---
+
+subroutine test_3e
+ implicit none
+ double precision :: integral_aaa,integral_aab,integral_abb,integral_bbb,accu
+ double precision ::  hmono, htwoe, hthree, htot
+ call htilde_mu_mat_bi_ortho(ref_bitmask, ref_bitmask, N_int, hmono, htwoe, hthree, htot)
+! call diag_htilde_three_body_ints_bi_ort(N_int, ref_bitmask, hthree)
+ print*,'hmono = ',hmono
+ print*,'htwoe = ',htwoe
+ print*,'hthree= ',hthree
+ print*,'htot  = ',htot
+ print*,''
+ print*,''
+ print*,'TC_one= ',tc_hf_one_e_energy
+ print*,'TC_two= ',TC_HF_two_e_energy
+ print*,'TC_3e = ',diag_three_elem_hf
+ print*,'TC_tot= ',TC_HF_energy
+ print*,''
+ print*,''
+ call give_aaa_contrib(integral_aaa)
+ print*,'integral_aaa = ',integral_aaa
+ call give_aab_contrib(integral_aab)
+ print*,'integral_aab = ',integral_aab
+ call give_abb_contrib(integral_abb)
+ print*,'integral_abb = ',integral_abb
+ call give_bbb_contrib(integral_bbb)
+ print*,'integral_bbb = ',integral_bbb
+ accu = integral_aaa + integral_aab + integral_abb + integral_bbb
+ print*,'accu = ',accu
+ print*,'delta = ',hthree - accu
+
+end
+
+subroutine routine_3()
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i, a, i_ok, s1
+  double precision               :: hmono, htwoe, hthree, htilde_ij
+  double precision               :: err_ai, err_tot, ref, new
+  integer(bit_kind), allocatable :: det_i(:,:)
+
+  allocate(det_i(N_int,2))
+
+  err_tot = 0.d0
+ 
+  do s1 = 1, 2
+
+   det_i = ref_bitmask
+   call debug_det(det_i, N_int)
+   print*, ' HF det'
+   call debug_det(det_i, N_int)
+ 
+   do i = 1, elec_num_tab(s1)
+     do a = elec_num_tab(s1)+1, mo_num ! virtual 
+ 
+ 
+       det_i = ref_bitmask
+       call do_single_excitation(det_i, i, a, s1, i_ok)
+       if(i_ok == -1) then
+        print*, 'PB !!'
+        print*, i, a
+        stop
+       endif
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
+ 
+       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       if(dabs(hthree).lt.1.d-10)cycle
+       ref = hthree 
+       if(s1 == 1)then
+        new = fock_a_tot_3e_bi_orth(a,i)
+       else if(s1 == 2)then
+        new = fock_b_tot_3e_bi_orth(a,i)
+       endif
+       err_ai = dabs(dabs(ref) - dabs(new))
+       if(err_ai .gt. 1d-7) then
+         print*,'s1 = ',s1
+         print*, ' warning on', i, a
+         print*, ref,new,err_ai
+       endif
+       print*, ref,new,err_ai
+       err_tot += err_ai
+ 
+       write(22, *) htilde_ij
+     enddo
+   enddo
+  enddo
+
+  print *, ' err_tot = ', err_tot
+
+  deallocate(det_i)
+
+end subroutine routine_3
+
+! ---
+subroutine routine_tot()
+
+  use bitmasks ! you need to include the bitmasks_module.f90 features
+
+  implicit none
+  integer                        :: i, a, i_ok, s1,other_spin(2)
+  double precision               :: hmono, htwoe, hthree, htilde_ij
+  double precision               :: err_ai, err_tot, ref, new
+  integer(bit_kind), allocatable :: det_i(:,:)
+
+  allocate(det_i(N_int,2))
+  other_spin(1) = 2
+  other_spin(2) = 1
+
+  err_tot = 0.d0
+ 
+!  do s1 = 1, 2
+   s1 = 2
+   det_i = ref_bitmask
+   call debug_det(det_i, N_int)
+   print*, ' HF det'
+   call debug_det(det_i, N_int)
+ 
+!   do i = 1, elec_num_tab(s1)
+!     do a = elec_num_tab(s1)+1, mo_num ! virtual 
+   do i = 1, elec_beta_num
+     do a = elec_beta_num+1, elec_alpha_num! virtual 
+!   do i = elec_beta_num+1, elec_alpha_num
+!     do a = elec_alpha_num+1, mo_num! virtual 
+       print*,i,a 
+ 
+       det_i = ref_bitmask
+       call do_single_excitation(det_i, i, a, s1, i_ok)
+       if(i_ok == -1) then
+        print*, 'PB !!'
+        print*, i, a
+        stop
+       endif
+ 
+       call htilde_mu_mat_bi_ortho(det_i, ref_bitmask, N_int, hmono, htwoe, hthree, htilde_ij)
+       print*,htilde_ij
+       if(dabs(htilde_ij).lt.1.d-10)cycle
+       print*, ' excited det'
+       call debug_det(det_i, N_int)
+
+       if(s1 == 1)then
+        new = Fock_matrix_tc_mo_alpha(a,i)
+       else
+        new = Fock_matrix_tc_mo_beta(a,i)
+       endif
+       ref = htilde_ij
+!       if(s1 == 1)then
+!        new = fock_a_tot_3e_bi_orth(a,i)
+!       else if(s1 == 2)then
+!        new = fock_b_tot_3e_bi_orth(a,i)
+!       endif
+       err_ai = dabs(dabs(ref) - dabs(new))
+       if(err_ai .gt. 1d-7) then
+         print*,'s1 = ',s1
+         print*, ' warning on', i, a
+         print*, ref,new,err_ai
+       endif
+       print*, ref,new,err_ai
+       err_tot += err_ai
+ 
+       write(22, *) htilde_ij
+     enddo
+   enddo
+!  enddo
+
+  print *, ' err_tot = ', err_tot
+
+  deallocate(det_i)
+
+end subroutine routine_3
--- a/src/tc_bi_ortho/u0_h_u0.irp.f
+++ b/src/tc_bi_ortho/u0_h_u0.irp.f
@ -0,0 +1,770 @@
+subroutine u_0_H_tc_u_0(e_0,u_0,n,keys_tmp,Nint,N_st,sze, do_right)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $E_0 = \frac{\langle u_0 | H_TC | u_0 \rangle}{\langle u_0 | u_0 \rangle}$
+  !
+  ! n : number of determinants
+  !
+  ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi>
+  END_DOC
+  integer, intent(in)             :: n,Nint, N_st, sze
+  logical, intent(in)             :: do_right 
+  double precision, intent(out)   :: e_0(N_st)
+  double precision, intent(inout) :: u_0(sze,N_st)
+  integer(bit_kind),intent(in)    :: keys_tmp(Nint,2,n)
+
+  double precision, allocatable   :: v_0(:,:), u_1(:,:)
+  double precision                :: u_dot_u,u_dot_v,diag_H_mat_elem
+  integer                         :: i,j, istate
+
+  allocate (v_0(n,N_st),u_1(n,N_st))
+  u_1(:,:) = 0.d0
+  u_1(1:n,1:N_st) = u_0(1:n,1:N_st)
+  call H_tc_u_0_nstates_openmp(v_0,u_1,N_st,n, do_right)
+  u_0(1:n,1:N_st) = u_1(1:n,1:N_st)
+  deallocate(u_1)
+  double precision :: norm
+  !$OMP PARALLEL DO PRIVATE(i,norm) DEFAULT(SHARED)
+  do i=1,N_st
+    norm = u_dot_u(u_0(1,i),n)
+    if (norm /= 0.d0) then
+      e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n) / dsqrt(norm)
+    else
+      e_0(i) = 0.d0
+    endif
+  enddo
+  !$OMP END PARALLEL DO
+  deallocate (v_0)
+end
+
+
+subroutine H_tc_u_0_opt(v_0,u_0,N_st,sze)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_0 = H | u_0\rangle$.
+  !
+  ! Assumes that the determinants are in psi_det
+  !
+  ! istart, iend, ishift, istep are used in ZMQ parallelization.
+  END_DOC
+  integer, intent(in)            :: N_st,sze
+  double precision, intent(inout)  :: v_0(sze,N_st), u_0(sze,N_st)
+  logical           :: do_right 
+  do_right = .True.
+  call H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right)
+end
+
+subroutine H_tc_dagger_u_0_opt(v_0,u_0,N_st,sze)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_0 = H | u_0\rangle$.
+  !
+  ! Assumes that the determinants are in psi_det
+  !
+  ! istart, iend, ishift, istep are used in ZMQ parallelization.
+  END_DOC
+  integer, intent(in)            :: N_st,sze
+  double precision, intent(inout)  :: v_0(sze,N_st), u_0(sze,N_st)
+  logical           :: do_right 
+  do_right = .False.
+  call H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right)
+end
+
+
+subroutine H_tc_u_0_nstates_openmp(v_0,u_0,N_st,sze, do_right)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_0 = H | u_0\rangle$.
+  !
+  ! Assumes that the determinants are in psi_det
+  !
+  ! istart, iend, ishift, istep are used in ZMQ parallelization.
+  !
+  ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi>
+  END_DOC
+  integer, intent(in)            :: N_st,sze
+  double precision, intent(inout)  :: v_0(sze,N_st), u_0(sze,N_st)
+  logical, intent(in)             :: do_right 
+  integer :: k
+  double precision, allocatable  :: u_t(:,:), v_t(:,:)
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: u_t
+  allocate(u_t(N_st,N_det),v_t(N_st,N_det))
+  provide mo_bi_ortho_tc_one_e mo_bi_ortho_tc_two_e 
+  provide ref_tc_energy_tot fock_op_2_e_tc_closed_shell 
+  provide eff_2_e_from_3_e_ab eff_2_e_from_3_e_aa eff_2_e_from_3_e_bb
+  do k=1,N_st
+    call dset_order(u_0(1,k),psi_bilinear_matrix_order,N_det)
+  enddo
+  v_t = 0.d0
+  call dtranspose(                                                   &
+      u_0,                                                           &
+      size(u_0, 1),                                                  &
+      u_t,                                                           &
+      size(u_t, 1),                                                  &
+      N_det, N_st)
+
+  call H_tc_u_0_nstates_openmp_work(v_t,u_t,N_st,sze,1,N_det,0,1, do_right)
+  deallocate(u_t)
+
+  call dtranspose(                                                   &
+      v_t,                                                           &
+      size(v_t, 1),                                                  &
+      v_0,                                                           &
+      size(v_0, 1),                                                  &
+      N_st, N_det)
+  deallocate(v_t)
+
+  do k=1,N_st
+    call dset_order(v_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+    call dset_order(u_0(1,k),psi_bilinear_matrix_order_reverse,N_det)
+  enddo
+
+end
+
+
+subroutine H_tc_u_0_nstates_openmp_work(v_t,u_t,N_st,sze,istart,iend,ishift,istep, do_right)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_t = H | u_t\rangle$ 
+  !
+  ! Default should be 1,N_det,0,1
+  !
+  ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi>
+  END_DOC
+  integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
+  double precision, intent(in)   :: u_t(N_st,N_det)
+  logical, intent(in)             :: do_right 
+  double precision, intent(out)  :: v_t(N_st,sze)
+
+
+  PROVIDE ref_bitmask_energy N_int
+
+  select case (N_int)
+    case (1)
+      call H_tc_u_0_nstates_openmp_work_1(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+    case (2)
+      call H_tc_u_0_nstates_openmp_work_2(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+    case (3)
+      call H_tc_u_0_nstates_openmp_work_3(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+    case (4)
+      call H_tc_u_0_nstates_openmp_work_4(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+    case default
+      call H_tc_u_0_nstates_openmp_work_N_int(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+  end select
+end
+BEGIN_TEMPLATE
+
+subroutine H_tc_u_0_nstates_openmp_work_$N_int(v_t,u_t,N_st,sze,istart,iend,ishift,istep,do_right)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Computes $v_t = H | u_t \\rangle$ 
+  !
+  ! Default should be 1,N_det,0,1
+  !
+  ! if do_right == True then you compute H_TC |Psi>, else H_TC^T |Psi>
+  END_DOC
+  integer, intent(in)            :: N_st,sze,istart,iend,ishift,istep
+  double precision, intent(in)   :: u_t(N_st,N_det)
+  logical, intent(in)             :: do_right 
+  double precision, intent(out)  :: v_t(N_st,sze)
+
+  double precision               :: hij
+  integer                        :: i,j,k,l,kk
+  integer                        :: k_a, k_b, l_a, l_b, m_a, m_b
+  integer                        :: istate
+  integer                        :: krow, kcol, krow_b, kcol_b
+  integer                        :: lrow, lcol
+  integer                        :: mrow, mcol
+  integer(bit_kind)              :: spindet($N_int)
+  integer(bit_kind)              :: tmp_det($N_int,2)
+  integer(bit_kind)              :: tmp_det2($N_int,2)
+  integer(bit_kind)              :: tmp_det3($N_int,2)
+  integer(bit_kind), allocatable :: buffer(:,:)
+  integer                        :: n_doubles
+  integer, allocatable           :: doubles(:)
+  integer, allocatable           :: singles_a(:)
+  integer, allocatable           :: singles_b(:)
+  integer, allocatable           :: idx(:), idx0(:)
+  integer                        :: maxab, n_singles_a, n_singles_b, kcol_prev
+  integer*8                      :: k8
+  logical                        :: compute_singles
+  integer*8                      :: last_found, left, right, right_max
+  double precision               :: rss, mem, ratio
+  double precision, allocatable  :: utl(:,:)
+  integer, parameter             :: block_size=128
+  logical                        :: u_is_sparse
+
+!  call resident_memory(rss)
+!  mem = dble(singles_beta_csc_size) / 1024.d0**3
+!
+!  compute_singles = (mem+rss > qp_max_mem)
+!
+!  if (.not.compute_singles) then
+!    provide singles_beta_csc
+!  endif
+compute_singles=.True.
+
+
+  maxab = max(N_det_alpha_unique, N_det_beta_unique)+1
+  allocate(idx0(maxab))
+
+  do i=1,maxab
+    idx0(i) = i
+  enddo
+
+  ! Prepare the array of all alpha single excitations
+  ! -------------------------------------------------
+
+  PROVIDE N_int nthreads_davidson
+  !$OMP PARALLEL DEFAULT(SHARED) NUM_THREADS(nthreads_davidson)        &
+      !$OMP   SHARED(psi_bilinear_matrix_rows, N_det,                &
+      !$OMP          psi_bilinear_matrix_columns,                    &
+      !$OMP          psi_det_alpha_unique, psi_det_beta_unique,      &
+      !$OMP          n_det_alpha_unique, n_det_beta_unique, N_int,   &
+      !$OMP          psi_bilinear_matrix_transp_rows,                &
+      !$OMP          psi_bilinear_matrix_transp_columns,             &
+      !$OMP          psi_bilinear_matrix_transp_order, N_st,         &
+      !$OMP          psi_bilinear_matrix_order_transp_reverse,       &
+      !$OMP          psi_bilinear_matrix_columns_loc,                &
+      !$OMP          psi_bilinear_matrix_transp_rows_loc,            &
+      !$OMP          istart, iend, istep, irp_here, v_t,             &
+      !$OMP          ishift, idx0, u_t, maxab, compute_singles,      &
+      !$OMP          singles_alpha_csc,singles_alpha_csc_idx,        &
+      !$OMP          singles_beta_csc,singles_beta_csc_idx)          &
+      !$OMP   PRIVATE(krow, kcol, tmp_det, spindet, k_a, k_b, i,     &
+      !$OMP          lcol, lrow, l_a, l_b, utl, kk, u_is_sparse,     &
+      !$OMP          buffer, doubles, n_doubles, umax,               &
+      !$OMP          tmp_det2, hij, idx, l, kcol_prev,hmono, htwoe, hthree,          &
+      !$OMP          singles_a, n_singles_a, singles_b, ratio,       &
+      !$OMP          n_singles_b, k8, last_found,left,right,right_max)
+
+  ! Alpha/Beta double excitations
+  ! =============================
+
+  allocate( buffer($N_int,maxab),                                     &
+      singles_a(maxab),                                              &
+      singles_b(maxab),                                              &
+      doubles(maxab),                                                &
+      idx(maxab), utl(N_st,block_size))
+
+  kcol_prev=-1
+
+  ! Check if u has multiple zeros
+  kk=1 ! Avoid division by zero
+  !$OMP DO
+  do k=1,N_det
+    umax = 0.d0
+    do l=1,N_st
+      umax = max(umax, dabs(u_t(l,k)))
+    enddo
+    if (umax < 1.d-20) then
+      !$OMP ATOMIC
+      kk = kk+1
+    endif
+  enddo
+  !$OMP END DO
+  u_is_sparse = N_det / kk < 20  ! 5%
+
+  ASSERT (iend <= N_det)
+  ASSERT (istart > 0)
+  ASSERT (istep  > 0)
+
+  !$OMP DO SCHEDULE(guided,64)
+  do k_a=istart+ishift,iend,istep    ! Loop over all determinants (/!\ not in psidet order)
+
+    krow = psi_bilinear_matrix_rows(k_a)       ! Index of alpha part of determinant k_a
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)    ! Index of beta part of determinant k_a
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+
+    if (kcol /= kcol_prev) then
+      tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+      if (compute_singles) then
+        call get_all_spin_singles_$N_int(                              &
+            psi_det_beta_unique, idx0,                                 &
+            tmp_det(1,2), N_det_beta_unique,                           &
+            singles_b, n_singles_b)
+      else
+        n_singles_b = 0
+        !DIR$ LOOP COUNT avg(1000)
+        do k8=singles_beta_csc_idx(kcol),singles_beta_csc_idx(kcol+1)-1
+          n_singles_b = n_singles_b+1
+          singles_b(n_singles_b) = singles_beta_csc(k8)
+        enddo
+      endif
+    endif
+    kcol_prev = kcol
+
+    ! -> Here, tmp_det is determinant k_a
+
+    ! Loop over singly excited beta columns
+    ! -------------------------------------
+
+    !DIR$ LOOP COUNT avg(1000)
+    do i=1,n_singles_b
+      lcol = singles_b(i)
+
+      tmp_det2(1:$N_int,2) = psi_det_beta_unique(1:$N_int, lcol)
+
+      ! tmp_det2 is a single excitation of tmp_det in the beta spin
+      ! the alpha part is not defined yet
+
+!---
+!      if (compute_singles) then
+
+        l_a = psi_bilinear_matrix_columns_loc(lcol)
+        ASSERT (l_a <= N_det)
+        ! rows  : | 1 2 3 4 | 1 3 4 6 | .... |  1  2  4  5 |
+        ! cols  : | 1 1 1 1 | 2 2 2 2 | .... |  8  8  8  8 |
+        ! index : | 1 2 3 4 | 5 6 7 8 | .... | 58 59 60 61 |
+        !                     ^                          ^
+        !                     |                          |
+        !                     l_a                       N_det
+        ! l_a is the index in the big vector os size Ndet of the position of the first element of column lcol
+
+        ! Below we identify all the determinants with the same beta part
+
+        !DIR$ UNROLL(8)
+        !DIR$ LOOP COUNT avg(50000)
+        do j=1,psi_bilinear_matrix_columns_loc(lcol+1) - psi_bilinear_matrix_columns_loc(lcol)
+          lrow = psi_bilinear_matrix_rows(l_a)
+          ASSERT (lrow <= N_det_alpha_unique)
+
+          buffer(1:$N_int,j) = psi_det_alpha_unique(1:$N_int, lrow)  ! hot spot
+
+          ASSERT (l_a <= N_det)
+          idx(j) = l_a
+          l_a = l_a+1
+        enddo
+        j = j-1
+
+        ! Get all single excitations from tmp_det(1,1) to buffer(1,?)
+
+        call get_all_spin_singles_$N_int(                              &
+            buffer, idx, tmp_det(1,1), j,                              &
+            singles_a, n_singles_a )
+
+      ! Loop over alpha singles
+      ! -----------------------
+
+      double precision :: umax
+
+      !DIR$ LOOP COUNT avg(1000)
+      do k = 1,n_singles_a,block_size
+        umax = 0.d0
+        ! Prefetch u_t(:,l_a)
+        if (u_is_sparse) then
+          do kk=0,block_size-1
+            if (k+kk > n_singles_a) exit
+            l_a = singles_a(k+kk)
+            ASSERT (l_a <= N_det)
+
+            do l=1,N_st
+              utl(l,kk+1) = u_t(l,l_a)
+              umax = max(umax, dabs(utl(l,kk+1)))
+            enddo
+          enddo
+        else
+          do kk=0,block_size-1
+            if (k+kk > n_singles_a) exit
+            l_a = singles_a(k+kk)
+            ASSERT (l_a <= N_det)
+            utl(:,kk+1) = u_t(:,l_a)
+          enddo
+          umax = 1.d0
+        endif
+        if (umax < 1.d-20) cycle
+
+        do kk=0,block_size-1
+          if (k+kk > n_singles_a) exit
+          l_a = singles_a(k+kk)
+          lrow = psi_bilinear_matrix_rows(l_a)
+          ASSERT (lrow <= N_det_alpha_unique)
+
+          tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+!        call i_H_j( tmp_det, tmp_det2, $N_int, hij) ! double alpha-beta
+          if(do_right)then
+           call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij)
+          else
+           call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij)
+          endif
+          !DIR$ LOOP COUNT AVG(4)
+          do l=1,N_st
+            v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+          enddo
+        enddo
+      enddo
+
+    enddo
+
+  enddo
+  !$OMP END DO
+
+  !$OMP DO SCHEDULE(guided,64)
+  do k_a=istart+ishift,iend,istep
+
+
+    ! Single and double alpha excitations
+    ! ===================================
+
+
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    ! Initial determinant is at k_b in beta-major representation
+    ! ----------------------------------------------------------------------
+
+    k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
+    ASSERT (k_b <= N_det)
+
+    spindet(1:$N_int) = tmp_det(1:$N_int,1)
+
+    ! Loop inside the beta column to gather all the connected alphas
+    lcol = psi_bilinear_matrix_columns(k_a)
+    l_a = psi_bilinear_matrix_columns_loc(lcol)
+
+    !DIR$ LOOP COUNT avg(200000)
+    do i=1,N_det_alpha_unique
+      if (l_a > N_det) exit
+      lcol = psi_bilinear_matrix_columns(l_a)
+      if (lcol /= kcol) exit
+      lrow = psi_bilinear_matrix_rows(l_a)
+      ASSERT (lrow <= N_det_alpha_unique)
+
+      buffer(1:$N_int,i) = psi_det_alpha_unique(1:$N_int, lrow) ! Hot spot
+      idx(i) = l_a
+      l_a = l_a+1
+    enddo
+    i = i-1
+
+    call get_all_spin_singles_and_doubles_$N_int(                    &
+        buffer, idx, spindet, i,                                     &
+        singles_a, doubles, n_singles_a, n_doubles )
+
+    ! Compute Hij for all alpha singles
+    ! ----------------------------------
+
+    tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+    !DIR$ LOOP COUNT avg(1000)
+    do i=1,n_singles_a,block_size
+      umax = 0.d0
+      ! Prefetch u_t(:,l_a)
+      if (u_is_sparse) then
+        do kk=0,block_size-1
+          if (i+kk > n_singles_a) exit
+          l_a = singles_a(i+kk)
+          ASSERT (l_a <= N_det)
+
+          do l=1,N_st
+            utl(l,kk+1) = u_t(l,l_a)
+            umax = max(umax, dabs(utl(l,kk+1)))
+          enddo
+        enddo
+      else
+        do kk=0,block_size-1
+          if (i+kk > n_singles_a) exit
+          l_a = singles_a(i+kk)
+          ASSERT (l_a <= N_det)
+          utl(:,kk+1) = u_t(:,l_a)
+        enddo
+        umax = 1.d0
+      endif
+      if (umax < 1.d-20) cycle
+
+      do kk=0,block_size-1
+        if (i+kk > n_singles_a) exit
+        l_a = singles_a(i+kk)
+        lrow = psi_bilinear_matrix_rows(l_a)
+        ASSERT (lrow <= N_det_alpha_unique)
+
+        tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, lrow)
+!        call i_h_j_single_spin( tmp_det, tmp_det2, $N_int, 1, hij)
+         if(do_right)then
+          call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij)
+         else
+          call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij)
+         endif
+
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+        enddo
+      enddo
+    enddo
+
+
+    ! Compute Hij for all alpha doubles
+    ! ----------------------------------
+
+    !DIR$ LOOP COUNT avg(50000)
+    do i=1,n_doubles,block_size
+      umax = 0.d0
+      ! Prefetch u_t(:,l_a)
+      if (u_is_sparse) then
+        do kk=0,block_size-1
+          if (i+kk > n_doubles) exit
+          l_a = doubles(i+kk)
+          ASSERT (l_a <= N_det)
+
+          do l=1,N_st
+            utl(l,kk+1) = u_t(l,l_a)
+            umax = max(umax, dabs(utl(l,kk+1)))
+          enddo
+        enddo
+      else
+        do kk=0,block_size-1
+          if (i+kk > n_doubles) exit
+          l_a = doubles(i+kk)
+          ASSERT (l_a <= N_det)
+          utl(:,kk+1) = u_t(:,l_a)
+        enddo
+        umax = 1.d0
+      endif
+      if (umax < 1.d-20) cycle
+
+      do kk=0,block_size-1
+        if (i+kk > n_doubles) exit
+        l_a = doubles(i+kk)
+        lrow = psi_bilinear_matrix_rows(l_a)
+        ASSERT (lrow <= N_det_alpha_unique)
+
+        tmp_det2(1:N_int,1) = psi_det_alpha_unique(1:N_int, lrow) 
+!        call i_H_j( tmp_det, tmp_det2, $N_int, hij)
+!        call i_H_j_double_spin( tmp_det(1,1), psi_det_alpha_unique(1, lrow), $N_int, hij)
+        if(do_right)then
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij)
+        else
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij)
+        endif
+
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+        enddo
+      enddo
+    enddo
+
+
+    ! Single and double beta excitations
+    ! ==================================
+
+
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    kcol = psi_bilinear_matrix_columns(k_a)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    spindet(1:$N_int) = tmp_det(1:$N_int,2)
+
+    ! Initial determinant is at k_b in beta-major representation
+    ! -----------------------------------------------------------------------
+
+    k_b = psi_bilinear_matrix_order_transp_reverse(k_a)
+    ASSERT (k_b <= N_det)
+
+    ! Loop inside the alpha row to gather all the connected betas
+    lrow = psi_bilinear_matrix_transp_rows(k_b)
+    l_b = psi_bilinear_matrix_transp_rows_loc(lrow)
+    !DIR$ LOOP COUNT avg(200000)
+    do i=1,N_det_beta_unique
+      if (l_b > N_det) exit
+      lrow = psi_bilinear_matrix_transp_rows(l_b)
+      if (lrow /= krow) exit
+      lcol = psi_bilinear_matrix_transp_columns(l_b)
+      ASSERT (lcol <= N_det_beta_unique)
+
+      buffer(1:$N_int,i) = psi_det_beta_unique(1:$N_int, lcol)
+      idx(i) = l_b
+      l_b = l_b+1
+    enddo
+    i = i-1
+
+    call get_all_spin_singles_and_doubles_$N_int(                    &
+        buffer, idx, spindet, i,                                     &
+        singles_b, doubles, n_singles_b, n_doubles )
+
+    ! Compute Hij for all beta singles
+    ! ----------------------------------
+
+    tmp_det2(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    !DIR$ LOOP COUNT avg(1000)
+    do i=1,n_singles_b,block_size
+      umax = 0.d0
+      if (u_is_sparse) then
+        do kk=0,block_size-1
+          if (i+kk > n_singles_b) exit
+          l_b = singles_b(i+kk)
+          l_a = psi_bilinear_matrix_transp_order(l_b)
+          ASSERT (l_b <= N_det)
+          ASSERT (l_a <= N_det)
+
+          do l=1,N_st
+            utl(l,kk+1) = u_t(l,l_a)
+            umax = max(umax, dabs(utl(l,kk+1)))
+          enddo
+        enddo
+      else
+        do kk=0,block_size-1
+          if (i+kk > n_singles_b) exit
+          l_b = singles_b(i+kk)
+          l_a = psi_bilinear_matrix_transp_order(l_b)
+          ASSERT (l_b <= N_det)
+          ASSERT (l_a <= N_det)
+          utl(:,kk+1) = u_t(:,l_a)
+        enddo
+        umax = 1.d0
+      endif
+      if (umax < 1.d-20) cycle
+
+      do kk=0,block_size-1
+        if (i+kk > n_singles_b) exit
+        l_b = singles_b(i+kk)
+        l_a = psi_bilinear_matrix_transp_order(l_b)
+        lcol = psi_bilinear_matrix_transp_columns(l_b)
+        ASSERT (lcol <= N_det_beta_unique)
+
+        tmp_det2(1:$N_int,2) = psi_det_beta_unique (1:$N_int, lcol)
+!        call i_H_j_single_spin( tmp_det, tmp_det2, $N_int, 2, hij)
+        if(do_right)then
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij)
+        else
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij)
+        endif
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+        enddo
+      enddo
+    enddo
+
+    ! Compute Hij for all beta doubles
+    ! ----------------------------------
+
+    !DIR$ LOOP COUNT avg(50000)
+    do i=1,n_doubles,block_size
+      umax = 0.d0
+      if (u_is_sparse) then
+        do kk=0,block_size-1
+          if (i+kk > n_doubles) exit
+          l_b = doubles(i+kk)
+          l_a = psi_bilinear_matrix_transp_order(l_b)
+          ASSERT (l_b <= N_det)
+          ASSERT (l_a <= N_det)
+          do l=1,N_st
+            utl(l,kk+1) = u_t(l,l_a)
+            umax = max(umax, dabs(utl(l,kk+1)))
+          enddo
+        enddo
+      else
+        do kk=0,block_size-1
+          if (i+kk > n_doubles) exit
+          l_b = doubles(i+kk)
+          l_a = psi_bilinear_matrix_transp_order(l_b)
+          ASSERT (l_b <= N_det)
+          ASSERT (l_a <= N_det)
+          utl(:,kk+1) = u_t(:,l_a)
+        enddo
+        umax = 1.d0
+      endif
+      if (umax < 1.d-20) cycle
+
+      do kk=0,block_size-1
+        if (i+kk > n_doubles) exit
+        l_b = doubles(i+kk)
+        l_a = psi_bilinear_matrix_transp_order(l_b)
+        lcol = psi_bilinear_matrix_transp_columns(l_b)
+        ASSERT (lcol <= N_det_beta_unique)
+
+        tmp_det2(1:N_int,2) = psi_det_beta_unique(1:N_int, lcol) 
+!        call i_H_j( tmp_det, tmp_det2, $N_int, hij)
+!        call i_H_j_double_spin( tmp_det(1,2), psi_det_beta_unique(1, lcol), $N_int, hij)
+        if(do_right)then
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det,tmp_det2,$N_int,hij)
+        else
+         call htilde_mu_mat_opt_bi_ortho_tot(tmp_det2,tmp_det,$N_int,hij)
+        endif
+
+        !DIR$ LOOP COUNT AVG(4)
+        do l=1,N_st
+          v_t(l,k_a) = v_t(l,k_a) + hij * utl(l,kk+1)
+        enddo
+      enddo
+    enddo
+
+
+    ! Diagonal contribution
+    ! =====================
+
+
+    ! Initial determinant is at k_a in alpha-major representation
+    ! -----------------------------------------------------------------------
+
+    if (u_is_sparse) then
+      umax = 0.d0
+      do l=1,N_st
+        umax = max(umax, dabs(u_t(l,k_a)))
+      enddo
+    else
+      umax = 1.d0
+    endif
+    if (umax < 1.d-20) cycle
+
+    krow = psi_bilinear_matrix_rows(k_a)
+    ASSERT (krow <= N_det_alpha_unique)
+
+    kcol = psi_bilinear_matrix_columns(k_a)
+    ASSERT (kcol <= N_det_beta_unique)
+
+    tmp_det(1:$N_int,1) = psi_det_alpha_unique(1:$N_int, krow)
+    tmp_det(1:$N_int,2) = psi_det_beta_unique (1:$N_int, kcol)
+
+    double precision, external :: diag_H_mat_elem
+    double precision :: hmono, htwoe, hthree
+
+!    hij = diag_H_mat_elem(tmp_det,$N_int)
+    call diag_htilde_mu_mat_fock_bi_ortho  ($N_int, tmp_det, hmono, htwoe, hthree, hij)
+    !DIR$ LOOP COUNT AVG(4)
+    do l=1,N_st
+      v_t(l,k_a) = v_t(l,k_a) + hij * u_t(l,k_a)
+    enddo
+
+  end do
+  !$OMP END DO
+  deallocate(buffer, singles_a, singles_b, doubles, idx, utl)
+  !$OMP END PARALLEL
+
+end
+
+SUBST [ N_int ]
+
+1;;
+2;;
+3;;
+4;;
+N_int;;
+
+END_TEMPLATE
+
+
--- a/src/tc_scf/11.tc_scf.bats
+++ b/src/tc_scf/11.tc_scf.bats
@ -9,11 +9,13 @@ function run_Ne() {
  echo Ne > Ne.xyz
  qp create_ezfio -b cc-pcvdz Ne.xyz -o Ne_tc_scf
  qp run scf 
+
+  qp set ao_two_e_erf_ints mu_erf 0.87
+  qp set tc_keywords j1b_type 3
+  qp set tc_keywords j1b_pen [1.5] 
  qp set tc_keywords bi_ortho True 
  qp set tc_keywords test_cycle_tc True
-  qp set ao_two_e_erf_ints mu_erf 0.87 
-  qp set tc_keywords j1b_pen [1.5]
-  qp set tc_keywords j1b_type 3 
+
  qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out 
  eref=-128.552134
  energy="$(qp get tc_scf bitc_energy)"
@ -25,3 +27,75 @@ function run_Ne() {
 run_Ne 
 }

+function run_C() {
+  rm -rf C_tc_scf
+  echo C  > C.xyz
+  qp create_ezfio -b cc-pcvdz C.xyz -o C_tc_scf -m 3
+  qp run scf 
+
+  qp set ao_two_e_erf_ints mu_erf 0.87
+  qp set tc_keywords j1b_type 3
+  qp set tc_keywords j1b_pen [1.5] 
+  qp set tc_keywords bi_ortho True 
+  qp set tc_keywords test_cycle_tc True
+
+  qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out 
+  eref=-37.691254356408791
+  energy="$(qp get tc_scf bitc_energy)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "C" {
+ run_C 
+}
+
+
+function run_O() {
+  rm -rf O_tc_scf
+  echo O  > O.xyz
+  qp create_ezfio -b cc-pcvdz O.xyz -o O_tc_scf -m 3
+  qp run scf 
+
+  qp set ao_two_e_erf_ints mu_erf 0.87
+  qp set tc_keywords j1b_type 3
+  qp set tc_keywords j1b_pen [1.5] 
+  qp set tc_keywords bi_ortho True 
+  qp set tc_keywords test_cycle_tc True
+
+  qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out 
+  eref=-74.814687229354590
+  energy="$(qp get tc_scf bitc_energy)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "O" {
+ run_O 
+}
+
+
+
+function run_ch2() {
+  rm -rf ch2_tc_scf
+  cp ${QP_ROOT}/tests/input/ch2.xyz .
+  qp create_ezfio -b "C:cc-pcvdz|H:cc-pvdz" ch2.xyz -o ch2_tc_scf 
+  qp run scf 
+
+  qp set ao_two_e_erf_ints mu_erf 0.87
+  qp set tc_keywords j1b_type 3
+  qp set tc_keywords j1b_pen '[1.5,10000,10000]'
+  qp set tc_keywords bi_ortho True
+  qp set tc_keywords test_cycle_tc True
+
+  qp run tc_scf | tee ${EZFIO_FILE}.tc_scf.out 
+  eref=-38.903247818077737
+  energy="$(qp get tc_scf bitc_energy)"
+  eq $energy $eref 1e-6
+}
+
+
+@test "ch2" {
+ run_ch2 
+}
+
--- a/src/tc_scf/NEED
+++ b/src/tc_scf/NEED
@ -1,6 +1,6 @@
 hartree_fock
 bi_ortho_mos
-three_body_ints
+ortho_three_e_ints
 bi_ort_ints
 tc_keywords
 non_hermit_dav
--- a/src/tc_scf/diago_bi_ort_tcfock.irp.f
+++ b/src/tc_scf/diago_bi_ort_tcfock.irp.f
@ -38,33 +38,9 @@
                     , fock_tc_leigvec_mo, fock_tc_reigvec_mo                       & 
                     , n_real_tc, eigval_right_tmp )

-  !if(max_ov_tc_scf)then
-  ! call non_hrmt_fock_mat( mo_num, F_tmp, thresh_biorthog_diag, thresh_biorthog_nondiag &
-  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo                          & 
-  !                    , n_real_tc, eigval_right_tmp )
-  !else 
-  ! call non_hrmt_diag_split_degen_bi_orthog( mo_num, F_tmp     &
-  !                    , fock_tc_leigvec_mo, fock_tc_reigvec_mo & 
-  !                    , n_real_tc, eigval_right_tmp )
-  !endif
-
  deallocate(F_tmp)

-
-!  if(n_real_tc .ne. mo_num)then
-!   print*,'n_real_tc ne mo_num ! ',n_real_tc
-!   stop
-!  endif
-
  eigval_fock_tc_mo = eigval_right_tmp
-!  print*,'Eigenvalues of Fock_matrix_tc_mo_tot'
-!  do i = 1, elec_alpha_num
-!    print*, i, eigval_fock_tc_mo(i)
-!  enddo
-!  do i = elec_alpha_num+1, mo_num 
-!    print*, i, eigval_fock_tc_mo(i) - level_shift_tcscf
-!  enddo
-!  deallocate( eigval_right_tmp )

  ! L.T x R 
  call dgemm( "T", "N", mo_num, mo_num, mo_num, 1.d0          &
--- a/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
+++ b/src/tc_scf/fock_3e_bi_ortho_uhf.irp.f
@ -49,6 +49,11 @@ END_PROVIDER

 BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_a, (mo_num, mo_num)]

+  BEGIN_DOC
+! ALPHA part of the Fock matrix from three-electron terms 
+!
+! WARNING :: non hermitian if bi-ortho MOS used 
+  END_DOC
  implicit none
  integer          :: a, b, i, j, o
  double precision :: I_bij_aij, I_bij_ija, I_bij_jai, I_bij_aji, I_bij_iaj, I_bij_jia
@ -145,6 +150,11 @@ END_PROVIDER
 ! ---

 BEGIN_PROVIDER [double precision, fock_3e_uhf_mo_b, (mo_num, mo_num)]
+  BEGIN_DOC
+! BETA part of the Fock matrix from three-electron terms 
+!
+! WARNING :: non hermitian if bi-ortho MOS used 
+  END_DOC

  implicit none
  integer          :: a, b, i, j, o
--- a/src/tc_scf/fock_for_right.irp.f
+++ b/src/tc_scf/fock_for_right.irp.f
--- a/src/tc_scf/fock_tc.irp.f
+++ b/src/tc_scf/fock_tc.irp.f
@ -6,10 +6,11 @@

  BEGIN_DOC
  !
-  ! two_e_tc_non_hermit_integral_seq_alpha(k,i) = <k| F^tc_alpha |i> 
+  ! two_e_tc_non_hermit_integral_seq_alpha(k,i) = <k| F^tc_alpha |i> ON THE AO BASIS 
  !
-  ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
+  ! where F^tc is the TWO-BODY part of the TC Fock matrix and k,i are AO basis functions
  !
+  ! works in SEQUENTIAL 
  END_DOC

  implicit none
@ -17,8 +18,6 @@
  double precision :: density, density_a, density_b
  double precision :: t0, t1

-  !print*, ' providing two_e_tc_non_hermit_integral_seq ...'
-  !call wall_time(t0)

  two_e_tc_non_hermit_integral_seq_alpha = 0.d0
  two_e_tc_non_hermit_integral_seq_beta  = 0.d0
@ -32,24 +31,6 @@
          density_b = TCSCF_density_matrix_ao_beta (l,j)
          density   = density_a + density_b

-          !!                                         rho(l,j)   *      < k l| T | i j>
-          !two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
-          !!                                         rho(l,j)   *      < k l| T | i j>
-          !two_e_tc_non_hermit_integral_seq_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
-          !!                                         rho_a(l,j) *      < l k| T | i j>
-          !two_e_tc_non_hermit_integral_seq_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
-          !!                                         rho_b(l,j) *      < l k| T | i j>
-          !two_e_tc_non_hermit_integral_seq_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
-
-          !!                                         rho(l,j)   *      < k l| T | i j>
-          !two_e_tc_non_hermit_integral_alpha(k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
-          !!                                         rho(l,j)   *      < k l| T | i j>
-          !two_e_tc_non_hermit_integral_beta (k,i) += density   * ao_two_e_tc_tot(l,j,k,i)
-          !!                                         rho_a(l,j) *      < l k| T | i j>
-          !two_e_tc_non_hermit_integral_alpha(k,i) -= density_a * ao_two_e_tc_tot(k,j,l,i)
-          !!                                         rho_b(l,j) *      < l k| T | i j>
-          !two_e_tc_non_hermit_integral_beta (k,i) -= density_b * ao_two_e_tc_tot(k,j,l,i)
-
          !                                         rho(l,j)   *      < k l| T | i j>
          two_e_tc_non_hermit_integral_seq_alpha(k,i) += density   * ao_two_e_tc_tot(k,i,l,j)
          !                                         rho(l,j)   *      < k l| T | i j>
@ -64,8 +45,6 @@
    enddo
  enddo

-  !call wall_time(t1)
-  !print*, ' wall time for two_e_tc_non_hermit_integral_seq after = ', t1 - t0

 END_PROVIDER 

@ -76,9 +55,9 @@ END_PROVIDER

  BEGIN_DOC
  !
-  ! two_e_tc_non_hermit_integral_alpha(k,i) = <k| F^tc_alpha |i> 
+  ! two_e_tc_non_hermit_integral_alpha(k,i) = <k| F^tc_alpha |i> ON THE AO BASIS 
  !
-  ! where F^tc is the two-body part of the TC Fock matrix and k,i are AO basis functions
+  ! where F^tc is the TWO-BODY part of the TC Fock matrix and k,i are AO basis functions
  !
  END_DOC

@ -88,8 +67,6 @@ END_PROVIDER
  double precision              :: t0, t1
  double precision, allocatable :: tmp_a(:,:), tmp_b(:,:)

-  !print*, ' providing two_e_tc_non_hermit_integral ...'
-  !call wall_time(t0)

  two_e_tc_non_hermit_integral_alpha = 0.d0
  two_e_tc_non_hermit_integral_beta  = 0.d0
@ -135,8 +112,6 @@ END_PROVIDER
  deallocate(tmp_a, tmp_b)
 !$OMP END PARALLEL

-  !call wall_time(t1)
-  !print*, ' wall time for two_e_tc_non_hermit_integral after = ', t1 - t0

 END_PROVIDER 

@ -181,14 +156,6 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_alpha, (mo_num, mo_num) ]

  if(bi_ortho) then

-    !allocate(tmp(ao_num,ao_num))
-    !tmp = Fock_matrix_tc_ao_alpha
-    !if(three_body_h_tc) then
-    !  tmp += fock_3e_uhf_ao_a
-    !endif
-    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1))
-    !deallocate(tmp)
-
    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_alpha, size(Fock_matrix_tc_ao_alpha, 1) &
                          , Fock_matrix_tc_mo_alpha, size(Fock_matrix_tc_mo_alpha, 1) )
    if(three_body_h_tc) then
@ -217,14 +184,6 @@ BEGIN_PROVIDER [ double precision, Fock_matrix_tc_mo_beta, (mo_num,mo_num) ]

  if(bi_ortho) then

-    !allocate(tmp(ao_num,ao_num))
-    !tmp = Fock_matrix_tc_ao_beta
-    !if(three_body_h_tc) then
-    !  tmp += fock_3e_uhf_ao_b
-    !endif
-    !call ao_to_mo_bi_ortho(tmp, size(tmp, 1), Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1))
-    !deallocate(tmp)
-
    call ao_to_mo_bi_ortho( Fock_matrix_tc_ao_beta, size(Fock_matrix_tc_ao_beta, 1) &
                          , Fock_matrix_tc_mo_beta, size(Fock_matrix_tc_mo_beta, 1) )
    if(three_body_h_tc) then
--- a/Show More
+++ b/Show More