Merge branch 'dev-stable-tc-scf' into dev-stable-tc-scf

2025-04-27 10:44:43 +02:00 · 2023-04-15 01:07:29 +02:00 · 2023-04-15 01:07:29 +02:00 · fb99d995e1
commit fb99d995e1
parent 3d1c307008 e4664975e1
24 changed files with 1361 additions and 360 deletions
--- a/src/bi_ortho_mos/overlap.irp.f
+++ b/src/bi_ortho_mos/overlap.irp.f
@ -12,32 +12,27 @@
  double precision              :: accu_d, accu_nd 
  double precision, allocatable :: tmp(:,:)
  
- ! TODO : re do the DEGEMM
+!  overlap_bi_ortho = 0.d0
+!  do i = 1, mo_num
+!    do k = 1, mo_num
+!      do m = 1, ao_num
+!        do n = 1, ao_num
+!          overlap_bi_ortho(k,i) += ao_overlap(n,m) * mo_l_coef(n,k) * mo_r_coef(m,i)
+!        enddo
+!      enddo
+!    enddo
+!  enddo

-  overlap_bi_ortho = 0.d0
-  do i = 1, mo_num
-    do k = 1, mo_num
-      do m = 1, ao_num
-        do n = 1, ao_num
-          overlap_bi_ortho(k,i) += ao_overlap(n,m) * mo_l_coef(n,k) * mo_r_coef(m,i)
-        enddo
-      enddo
-    enddo
-  enddo
-
-!  allocate( tmp(mo_num,ao_num) )
-!
-!  ! tmp <-- L.T x S_ao
-!  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                         & 
-!            , mo_l_coef, size(mo_l_coef, 1), ao_overlap, size(ao_overlap, 1) &
-!            , 0.d0, tmp, size(tmp, 1) )
-!
-!  ! S <-- tmp x R
-!  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0           & 
-!            , tmp, size(tmp, 1), mo_r_coef, size(mo_r_coef, 1) &
-!            , 0.d0, overlap_bi_ortho, size(overlap_bi_ortho, 1) )
-!
-!  deallocate( tmp )
+  allocate( tmp(mo_num,ao_num) )
+  ! tmp <-- L.T x S_ao
+  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                   & 
+            , mo_l_coef(1,1), size(mo_l_coef, 1), ao_overlap(1,1), size(ao_overlap, 1) &
+            , 0.d0, tmp(1,1), size(tmp, 1) )
+  ! S <-- tmp x R
+  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                     & 
+            , tmp(1,1), size(tmp, 1), mo_r_coef(1,1), size(mo_r_coef, 1) &
+            , 0.d0, overlap_bi_ortho(1,1), size(overlap_bi_ortho, 1) )
+  deallocate(tmp)

  do i = 1, mo_num
    overlap_diag_bi_ortho(i) = overlap_bi_ortho(i,i)
@ -84,20 +79,41 @@ END_PROVIDER
  END_DOC

  implicit none
-  integer :: i, j, p, q
+  integer                       :: i, j, p, q
+  double precision, allocatable :: tmp(:,:)

-  overlap_mo_r = 0.d0
-  overlap_mo_l = 0.d0
-  do i = 1, mo_num
-    do j = 1, mo_num
-      do p = 1, ao_num
-        do q = 1, ao_num
-          overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) 
-          overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p)
-        enddo
-      enddo
-    enddo
-  enddo
+  !overlap_mo_r = 0.d0
+  !overlap_mo_l = 0.d0
+  !do i = 1, mo_num
+  !  do j = 1, mo_num
+  !    do p = 1, ao_num
+  !      do q = 1, ao_num
+  !        overlap_mo_r(j,i) += mo_r_coef(q,i) * mo_r_coef(p,j) * ao_overlap(q,p) 
+  !        overlap_mo_l(j,i) += mo_l_coef(q,i) * mo_l_coef(p,j) * ao_overlap(q,p)
+  !      enddo
+  !    enddo
+  !  enddo
+  !enddo
+
+  allocate( tmp(mo_num,ao_num) )
+
+  tmp = 0.d0
+  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                   & 
+            , mo_r_coef(1,1), size(mo_r_coef, 1), ao_overlap(1,1), size(ao_overlap, 1) &
+            , 0.d0, tmp(1,1), size(tmp, 1) )
+  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                     & 
+            , tmp(1,1), size(tmp, 1), mo_r_coef(1,1), size(mo_r_coef, 1) &
+            , 0.d0, overlap_mo_r(1,1), size(overlap_mo_r, 1) )
+
+  tmp = 0.d0
+  call dgemm( "T", "N", mo_num, ao_num, ao_num, 1.d0                                   & 
+            , mo_l_coef(1,1), size(mo_l_coef, 1), ao_overlap(1,1), size(ao_overlap, 1) &
+            , 0.d0, tmp(1,1), size(tmp, 1) )
+  call dgemm( "N", "N", mo_num, mo_num, ao_num, 1.d0                     & 
+            , tmp(1,1), size(tmp, 1), mo_l_coef(1,1), size(mo_l_coef, 1) &
+            , 0.d0, overlap_mo_l(1,1), size(overlap_mo_l, 1) )
+
+  deallocate(tmp)

 END_PROVIDER 

--- a/src/davidson/EZFIO.cfg
+++ b/src/davidson/EZFIO.cfg
@ -1,71 +1,18 @@
-[threshold_davidson]
-type: Threshold
-doc: Thresholds of Davidson's algorithm if threshold_davidson_from_pt2 is false.
-interface: ezfio,provider,ocaml
-default: 1.e-10
-
-[threshold_nonsym_davidson]
-type: Threshold
-doc: Thresholds of non-symetric Davidson's algorithm
-interface: ezfio,provider,ocaml
-default: 1.e-10
-
-[threshold_davidson_from_pt2]
-type: logical
-doc: Thresholds of Davidson's algorithm is set to E(rPT2)*threshold_davidson_from_pt2
-interface: ezfio,provider,ocaml
-default: false
-
-[n_states_diag]
-type: States_number
-doc: Controls the number of states to consider during the Davdison diagonalization. The number of states is n_states * n_states_diag
-default: 4
-interface: ezfio,ocaml
-
-[davidson_sze_max]
-type: Strictly_positive_int
-doc: Number of micro-iterations before re-contracting
-default: 15
-interface: ezfio,provider,ocaml
-
-[state_following]
-type: logical
-doc: If |true|, the states are re-ordered to match the input states
-default: False
-interface: ezfio,provider,ocaml
-
-[disk_based_davidson]
-type: logical
-doc: If |true|, a memory-mapped file may be used to store the W and S2 vectors if not enough RAM is available
-default: True
-interface: ezfio,provider,ocaml
-
 [csf_based]
 type: logical
 doc: If |true|, use the CSF-based algorithm
 default: False
 interface: ezfio,provider,ocaml

-[distributed_davidson]
-type: logical
-doc: If |true|, use the distributed algorithm
-default: True 
-interface: ezfio,provider,ocaml
-
 [only_expected_s2]
 type: logical
 doc: If |true|, use filter out all vectors with bad |S^2| values
 default: True
 interface: ezfio,provider,ocaml

-[n_det_max_full]
-type: Det_number_max
-doc: Maximum number of determinants where |H| is fully diagonalized 
-interface: ezfio,provider,ocaml
-default: 1000
-
 [without_diagonal]
 type: logical
 doc: If |true|, don't use denominator
 default: False
 interface: ezfio,provider,ocaml
+
--- a/src/davidson/NEED
+++ b/src/davidson/NEED
@ -1 +1,2 @@
 csf
+davidson_keywords
--- a/src/davidson/davidson_parallel.irp.f
+++ b/src/davidson/davidson_parallel.irp.f
@ -546,21 +546,6 @@ end



-BEGIN_PROVIDER [ integer, nthreads_davidson ]
- implicit none
- BEGIN_DOC
- ! Number of threads for Davidson
- END_DOC
- nthreads_davidson = nproc
- character*(32) :: env
- call getenv('QP_NTHREADS_DAVIDSON',env)
- if (trim(env) /= '') then
-   read(env,*) nthreads_davidson
-   call write_int(6,nthreads_davidson,'Target number of threads for <Psi|H|Psi>')
- endif
-END_PROVIDER
-
-
 integer function zmq_put_N_states_diag(zmq_to_qp_run_socket,worker_id)
  use f77_zmq
  implicit none
--- a/src/davidson/diagonalization_hs2_dressed.irp.f
+++ b/src/davidson/diagonalization_hs2_dressed.irp.f
@ -14,15 +14,6 @@ BEGIN_PROVIDER [ character*(64), diag_algorithm ]
  endif
 END_PROVIDER

-BEGIN_PROVIDER [ double precision, threshold_davidson_pt2 ]
- implicit none
- BEGIN_DOC
- ! Threshold of Davidson's algorithm, using PT2 as a guide
- END_DOC
- threshold_davidson_pt2 = threshold_davidson
-
-END_PROVIDER
-


 BEGIN_PROVIDER [ integer, dressed_column_idx, (N_states) ]
@ -66,7 +57,7 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d
  double precision, allocatable  :: H_jj(:)

  double precision, external     :: diag_H_mat_elem, diag_S_mat_elem
-  integer                        :: i,k
+  integer                        :: i,k,l
  ASSERT (N_st > 0)
  ASSERT (sze > 0)
  ASSERT (Nint > 0)
@ -87,9 +78,14 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d

  if (dressing_state > 0) then
    do k=1,N_st
+
      do i=1,sze
-        H_jj(i)  += u_in(i,k) * dressing_column_h(i,k)
+        H_jj(i) += u_in(i,k) * dressing_column_h(i,k)
      enddo
+
+      !l = dressed_column_idx(k)
+      !H_jj(l) += u_in(l,k) * dressing_column_h(l,k)
+
    enddo
  endif

@ -467,7 +463,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_


       y = h
-!       y = h_p
+       !y = h_p
       lwork = -1
       allocate(work(1))
       call dsygv(1,'V','U',shift2,y,size(y,1), &
--- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f
+++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f
@ -0,0 +1,541 @@
+
+! ---
+
+subroutine davidson_diag_nonsym_h(dets_in, u_in, dim_in, energies, sze, N_st, N_st_diag, Nint, dressing_state, converged)
+
+  BEGIN_DOC
+  !
+  ! non-sym Davidson diagonalization.
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  !
+  END_DOC
+
+  use bitmasks
+
+  implicit none
+
+  integer,           intent(in)    :: dim_in, sze, N_st, N_st_diag, Nint
+  integer,           intent(in)    :: dressing_state
+  integer(bit_kind), intent(in)    :: dets_in(Nint,2,sze)
+  logical,           intent(out)   :: converged
+  double precision,  intent(out)   :: energies(N_st_diag)
+  double precision,  intent(inout) :: u_in(dim_in,N_st_diag)
+
+  integer                          :: i, k, l
+  double precision                 :: f
+  double precision, allocatable    :: H_jj(:)
+
+  double precision, external       :: diag_H_mat_elem
+
+  ASSERT (N_st > 0)
+  ASSERT (sze  > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  PROVIDE mo_two_e_integrals_in_map
+
+  allocate(H_jj(sze))
+
+  H_jj(1) = diag_H_mat_elem(dets_in(1,1,1), Nint)
+ !$OMP PARALLEL DEFAULT(NONE)           &
+ !$OMP SHARED(sze, H_jj, dets_in, Nint) &
+ !$OMP PRIVATE(i)
+ !$OMP DO SCHEDULE(static)
+  do i = 2, sze
+    H_jj(i) = diag_H_mat_elem(dets_in(1,1,i), Nint)
+  enddo
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+  if(dressing_state > 0) then
+    do k = 1, N_st
+      do l = 1, N_st
+        f = overlap_states_inv(k,l)
+
+        !do i = 1, N_det
+        !  H_jj(i) += f * dressing_delta(i,k) * psi_coef(i,l)
+        do i = 1, dim_in
+          H_jj(i) += f * dressing_delta(i,k) * u_in(i,l)
+        enddo
+
+      enddo
+    enddo
+  endif
+
+  call davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, N_st, N_st_diag, Nint, dressing_state, converged)
+
+  deallocate(H_jj)
+
+end subroutine davidson_diag_nonsym_h
+
+! ---
+
+subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze, N_st, N_st_diag_in, Nint, dressing_state, converged)
+
+  BEGIN_DOC
+  !
+  ! non-sym Davidson diagonalization with specific diagonal elements of the H matrix
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  !
+  ! N_st_diag_in : Number of states in which H is diagonalized. Assumed > sze
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  !
+  END_DOC
+
+  include 'constants.include.F'
+
+  use bitmasks
+  use mmap_module
+
+  implicit none
+
+  integer,           intent(in)    :: dim_in, sze, N_st, N_st_diag_in, Nint
+  integer,           intent(in)    :: dressing_state
+  integer(bit_kind), intent(in)    :: dets_in(Nint,2,sze)
+  double precision,  intent(in)    :: H_jj(sze)
+  double precision,  intent(out)   :: energies(N_st_diag_in)
+  logical,           intent(inout) :: converged
+  double precision,  intent(inout) :: u_in(dim_in,N_st_diag_in)
+
+  logical                          :: disk_based
+  character*(16384)                :: write_buffer
+  integer                          :: i, j, k, l, m
+  integer                          :: iter, N_st_diag, itertot, shift, shift2, itermax, istate
+  integer                          :: nproc_target
+  integer                          :: order(N_st_diag_in)
+  integer                          :: maxab
+  double precision                 :: rss
+  double precision                 :: cmax
+  double precision                 :: to_print(2,N_st)
+  double precision                 :: r1, r2
+  double precision                 :: f
+  double precision, allocatable    :: y(:,:), h(:,:), lambda(:)
+  double precision, allocatable    :: s_tmp(:,:), u_tmp(:,:)
+  double precision, allocatable    :: residual_norm(:)
+  double precision, allocatable    :: U(:,:), overlap(:,:)
+  double precision, pointer        :: W(:,:)
+
+  double precision, external       :: u_dot_u
+
+
+  N_st_diag = N_st_diag_in
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, y, h, lambda
+  if(N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_full to ', N_st_diag*3
+    stop -1
+  endif
+
+  itermax = max(2, min(davidson_sze_max, sze/N_st_diag)) + 1
+  itertot = 0
+
+  if(state_following) then
+    allocate(overlap(N_st_diag*itermax, N_st_diag*itermax))
+  else
+    allocate(overlap(1,1))  ! avoid 'if' for deallocate
+  endif
+  overlap = 0.d0
+
+  PROVIDE nuclear_repulsion expected_s2 psi_bilinear_matrix_order psi_bilinear_matrix_order_reverse threshold_davidson_pt2 threshold_davidson_from_pt2
+  PROVIDE threshold_nonsym_davidson
+
+  call write_time(6)
+  write(6,'(A)') ''
+  write(6,'(A)') 'Davidson Diagonalization'
+  write(6,'(A)') '------------------------'
+  write(6,'(A)') ''
+
+  ! Find max number of cores to fit in memory
+  ! -----------------------------------------
+
+  nproc_target = nproc
+  maxab = max(N_det_alpha_unique, N_det_beta_unique) + 1
+
+  m=1
+  disk_based = .False.
+  call resident_memory(rss)
+  do
+    r1 = 8.d0 *                                   &! bytes
+         ( dble(sze)*(N_st_diag*itermax)          &! U
+         + 1.0d0*dble(sze*m)*(N_st_diag*itermax)  &! W
+         + 3.0d0*(N_st_diag*itermax)**2           &! h,y,s_tmp
+         + 1.d0*(N_st_diag*itermax)               &! lambda
+         + 1.d0*(N_st_diag)                       &! residual_norm
+                                                   ! In H_u_0_nstates_zmq
+         + 2.d0*(N_st_diag*N_det)                 &! u_t, v_t, on collector
+         + 2.d0*(N_st_diag*N_det)                 &! u_t, v_t, on slave
+         + 0.5d0*maxab                            &! idx0 in H_u_0_nstates_openmp_work_*
+         + nproc_target *                         &! In OMP section
+           ( 1.d0*(N_int*maxab)                   &! buffer
+           + 3.5d0*(maxab) )                      &! singles_a, singles_b, doubles, idx
+         ) / 1024.d0**3
+
+    if(nproc_target == 0) then
+      call check_mem(r1, irp_here)
+      nproc_target = 1
+      exit
+    endif
+
+    if(r1+rss < qp_max_mem) then
+      exit
+    endif
+
+    if(itermax > 4) then
+      itermax = itermax - 1
+    else if(m==1 .and. disk_based_davidson) then
+      m = 0
+      disk_based = .True.
+      itermax = 6
+    else
+      nproc_target = nproc_target - 1
+    endif
+
+  enddo
+
+  nthreads_davidson = nproc_target
+  TOUCH nthreads_davidson
+
+  call write_int(6, N_st, 'Number of states')
+  call write_int(6, N_st_diag, 'Number of states in diagonalization')
+  call write_int(6, sze, 'Number of determinants')
+  call write_int(6, nproc_target, 'Number of threads for diagonalization')
+  call write_double(6, r1, 'Memory(Gb)')
+  if(disk_based) then
+    print *, 'Using swap space to reduce RAM'
+  endif
+
+  !---------------
+
+  write(6,'(A)') ''
+  write_buffer = '====='
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6, '(A)') write_buffer(1:6+41*N_st)
+  write_buffer = 'Iter'
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//'       Energy        Residual '
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+  write_buffer = '====='
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') write_buffer(1:6+41*N_st)
+
+
+  if(disk_based) then
+    ! Create memory-mapped files for W and S
+    type(c_ptr) :: ptr_w, ptr_s
+    integer :: fd_s, fd_w
+    call mmap(trim(ezfio_work_dir)//'davidson_w', (/int(sze,8),int(N_st_diag*itermax,8)/),&
+        8, fd_w, .False., ptr_w)
+    call c_f_pointer(ptr_w, w, (/sze,N_st_diag*itermax/))
+  else
+    allocate(W(sze,N_st_diag*itermax))
+  endif
+
+  allocate(                                                          &
+      ! Large
+      U(sze,N_st_diag*itermax),                                      &
+      ! Small
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
+      residual_norm(N_st_diag),                                      &
+      lambda(N_st_diag*itermax),                                     &
+      u_tmp(N_st,N_st_diag))
+
+  h = 0.d0
+  U = 0.d0
+  y = 0.d0
+  s_tmp = 0.d0
+
+
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+
+  ! Davidson iterations
+  ! ===================
+
+  converged = .False.
+
+  do k = N_st+1, N_st_diag
+    do i = 1, sze
+        call random_number(r1)
+        call random_number(r2)
+        r1 = dsqrt(-2.d0*dlog(r1))
+        r2 = dtwo_pi*r2
+        u_in(i,k) = r1*dcos(r2) * u_in(i,k-N_st)
+    enddo
+    u_in(k,k) = u_in(k,k) + 10.d0
+  enddo
+  do k = 1, N_st_diag
+    call normalize(u_in(1,k), sze)
+  enddo
+
+  do k = 1, N_st_diag
+    do i = 1, sze
+      U(i,k) = u_in(i,k)
+    enddo
+  enddo
+
+
+  do while (.not.converged)
+    itertot = itertot + 1
+    if(itertot == 8) then
+      exit
+    endif
+
+    do iter = 1, itermax-1
+
+      shift  = N_st_diag*(iter-1)
+      shift2 = N_st_diag*iter
+
+!      if( (iter > 1) .or. (itertot == 1) ) then
+
+        ! Gram-Schmidt to orthogonalize all new guess with the previous vectors 
+        call ortho_qr(U, size(U, 1), sze, shift2)
+        call ortho_qr(U, size(U, 1), sze, shift2)
+
+        ! Compute |W_k> = \sum_i |i><i|H|u_k>
+        ! -----------------------------------
+
+        if( (sze > 100000) .and. distributed_davidson ) then
+          call H_u_0_nstates_zmq   (W(1,shift+1), U(1,shift+1), N_st_diag, sze)
+        else
+          call H_u_0_nstates_openmp(W(1,shift+1), U(1,shift+1), N_st_diag, sze)
+        endif
+!      else
+!         ! Already computed in update below
+!         continue
+!      endif
+
+      if(dressing_state > 0) then
+
+        call dgemm( 'T', 'N', N_st, N_st_diag, sze, 1.d0                   &
+                  , psi_coef, size(psi_coef, 1), U(1, shift+1), size(U, 1) &
+                  , 0.d0, u_tmp, size(u_tmp, 1))
+
+        do istate = 1, N_st_diag
+          do k = 1, N_st
+            do l = 1, N_st
+             f = overlap_states_inv(k,l)
+              do i = 1, sze
+                W(i,shift+istate) += f * dressing_delta(i,k) * u_tmp(l,istate)
+              enddo
+            enddo
+          enddo
+        enddo
+
+      endif
+
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+
+      call dgemm( 'T', 'N', shift2, shift2, sze, 1.d0 &
+                , U, size(U, 1), W, size(W, 1)        &
+                , 0.d0, h, size(h, 1))
+
+      ! Diagonalize h
+      ! ---------------
+      call diag_nonsym_right(shift2, h(1,1), size(h, 1), y(1,1), size(y, 1), lambda(1), size(lambda, 1))
+
+
+      if (state_following) then
+
+        overlap = -1.d0
+        do k = 1, shift2
+          do i = 1, shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
+        enddo
+        do k = 1, N_st
+          cmax = -1.d0
+          do i = 1, N_st
+            if(overlap(i,k) > cmax) then
+              cmax = overlap(i,k)
+              order(k) = i
+            endif
+          enddo
+          do i = 1, N_st_diag
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k = 1, N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
+          endif
+        enddo
+        do k = 1, N_st
+          overlap(k,1) = lambda(k)
+        enddo
+
+      endif
+
+      ! Express eigenvectors of h in the determinant basis
+      ! --------------------------------------------------
+
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , U, size(U, 1), y, size(y, 1)           &
+                , 0.d0, U(1,shift2+1), size(U, 1))
+
+      do k = 1, N_st_diag
+        call normalize(U(1,shift2+k), sze)
+      enddo
+
+      call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+                , W, size(W, 1), y, size(y, 1)           &
+                , 0.d0, W(1,shift2+1), size(W,1))
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+
+      !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,k)
+      do k = 1, N_st_diag
+        do i = 1, sze
+          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k)) / max(H_jj(i)-lambda(k), 1.d-2)
+        enddo
+
+        if(k <= N_st) then
+          residual_norm(k) = u_dot_u(U(1,shift2+k), sze)
+          to_print(1,k) = lambda(k) + nuclear_repulsion
+          to_print(2,k) = residual_norm(k)
+        endif
+      enddo
+      !$OMP END PARALLEL DO
+
+      if((itertot>1).and.(iter == 1)) then
+        !don't print
+        continue
+      else
+        write(*, '(1X, I3, 1X, 100(1X, F16.10, 1X, E11.3))') iter-1, to_print(1:2,1:N_st)
+      endif
+
+      ! Check convergence
+      if(iter > 1) then
+        if(threshold_davidson_from_pt2) then
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_davidson_pt2
+        else
+          converged = dabs(maxval(residual_norm(1:N_st))) < threshold_nonsym_davidson
+        endif
+      endif
+
+      do k = 1, N_st
+        if(residual_norm(k) > 1.d8) then
+          print *, 'Davidson failed'
+          stop -1
+        endif
+      enddo
+      if(converged) then
+        exit
+      endif
+
+      logical, external :: qp_stop
+      if(qp_stop()) then
+        converged = .True.
+        exit
+      endif
+
+
+    enddo
+
+    ! Re-contract U and update W
+    ! --------------------------------
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+              , W, size(W, 1), y, size(y, 1)           & 
+              , 0.d0, u_in, size(u_in, 1))
+    do k = 1, N_st_diag
+      do i = 1, sze
+        W(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    call dgemm( 'N', 'N', sze, N_st_diag, shift2, 1.d0 &
+              , U, size(U, 1), y, size(y, 1), 0.d0     &
+              , u_in, size(u_in, 1))
+
+    do k = 1, N_st_diag
+      do i = 1, sze
+        U(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+  enddo
+
+
+  call nullify_small_elements(sze, N_st_diag, U, size(U, 1), threshold_davidson_pt2)
+  do k = 1, N_st_diag
+    do i = 1, sze
+      u_in(i,k) = U(i,k)
+    enddo
+  enddo
+
+  do k = 1, N_st_diag
+    energies(k) = lambda(k)
+  enddo
+  write_buffer = '======'
+  do i = 1, N_st
+    write_buffer = trim(write_buffer)//' ================ ==========='
+  enddo
+  write(6,'(A)') trim(write_buffer)
+  write(6,'(A)') ''
+  call write_time(6)
+
+  if(disk_based) then
+    ! Remove temp files
+    integer, external :: getUnitAndOpen
+    call munmap( (/int(sze,8),int(N_st_diag*itermax,8)/), 8, fd_w, ptr_w )
+    fd_w = getUnitAndOpen(trim(ezfio_work_dir)//'davidson_w','r')
+    close(fd_w,status='delete')
+  else
+    deallocate(W)
+  endif
+
+  deallocate (                                                       &
+      residual_norm,                                                 &
+      U, overlap,                                                    &
+      h, y, s_tmp,                                                   &
+      lambda,                                                        &
+      u_tmp                                                          &
+      )
+  FREE nthreads_davidson
+
+end subroutine davidson_diag_nonsym_hjj
+
+! ---
+
+
+
+
+
+
+
--- a/src/davidson/overlap_states.irp.f
+++ b/src/davidson/overlap_states.irp.f
@ -0,0 +1,40 @@
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, overlap_states,     (N_states,N_states) ]
+&BEGIN_PROVIDER [ double precision, overlap_states_inv, (N_states,N_states) ]
+
+  BEGIN_DOC
+  !
+  ! S_kl = ck.T x cl 
+  !      = psi_coef(:,k).T x psi_coef(:,l)
+  !
+  END_DOC
+
+  implicit none
+  integer          :: i
+  double precision :: o_tmp
+
+  if(N_states == 1) then
+
+    o_tmp = 0.d0
+    do i = 1, N_det
+      o_tmp = o_tmp + psi_coef(i,1) * psi_coef(i,1)
+    enddo
+    overlap_states    (1,1) = o_tmp
+    overlap_states_inv(1,1) = 1.d0 / o_tmp
+
+  else
+
+    call dgemm( 'T', 'N', N_states, N_states, N_det, 1.d0                &
+              , psi_coef, size(psi_coef, 1), psi_coef, size(psi_coef, 1) &
+              , 0.d0, overlap_states, size(overlap_states, 1)            )
+
+    call get_inverse(overlap_states, N_states, N_states, overlap_states_inv, N_states)
+
+  endif
+
+END_PROVIDER
+
+! ---
+
--- a/src/davidson_dressed/nonsym_diagonalize_ci.irp.f
+++ b/src/davidson_dressed/nonsym_diagonalize_ci.irp.f
@ -0,0 +1,188 @@
+
+! ---
+
+BEGIN_PROVIDER [ double precision, CI_energy_nonsym_dressed, (N_states_diag) ]
+
+  BEGIN_DOC
+  ! N_states lowest eigenvalues of the CI matrix
+  END_DOC
+
+  implicit none
+  integer       :: j
+  character*(8) :: st
+
+  call write_time(6)
+  do j = 1, min(N_det, N_states_diag)
+    CI_energy_nonsym_dressed(j) = CI_electronic_energy_nonsym_dressed(j) + nuclear_repulsion
+  enddo
+
+  do j = 1, min(N_det, N_states)
+    write(st, '(I4)') j
+    call write_double(6, CI_energy_nonsym_dressed(j), 'Energy of state '//trim(st))
+  enddo
+
+END_PROVIDER
+
+! ---
+
+ BEGIN_PROVIDER [ double precision, CI_electronic_energy_nonsym_dressed, (N_states_diag) ]
+&BEGIN_PROVIDER [ double precision, CI_eigenvectors_nonsym_dressed, (N_det,N_states_diag) ]
+
+  BEGIN_DOC
+  ! Eigenvectors/values of the CI matrix
+  END_DOC
+
+  implicit none
+  logical                       :: converged
+  integer                       :: i, j, k
+  integer                       :: i_other_state
+  integer                       :: i_state
+  logical,          allocatable :: good_state_array(:)
+  integer,          allocatable :: index_good_state_array(:)
+  double precision, allocatable :: eigenvectors(:,:), eigenvalues(:)
+
+  PROVIDE threshold_nonsym_davidson nthreads_davidson
+
+  ! Guess values for the "N_states" states of the CI_eigenvectors_nonsym_dressed
+  do j = 1, min(N_states, N_det)
+    do i = 1, N_det
+      CI_eigenvectors_nonsym_dressed(i,j) = psi_coef(i,j)
+    enddo
+  enddo
+
+  do j = min(N_states, N_det)+1, N_states_diag
+    do i = 1, N_det
+      CI_eigenvectors_nonsym_dressed(i,j) = 0.d0
+    enddo
+  enddo
+
+  ! ---
+
+  if(diag_algorithm == "Davidson") then
+
+    ASSERT(n_states_diag .lt. n_states)
+
+    do j = 1, min(N_states, N_det)
+      do i = 1, N_det
+        CI_eigenvectors_nonsym_dressed(i,j) = psi_coef(i,j)
+      enddo
+    enddo
+
+    converged = .False.
+    call davidson_diag_nonsym_h( psi_det, CI_eigenvectors_nonsym_dressed &
+                               , size(CI_eigenvectors_nonsym_dressed, 1) &
+                               , CI_electronic_energy_nonsym_dressed     &
+                               , N_det, min(N_det, N_states), min(N_det, N_states_diag), N_int, 1, converged )
+
+  else if(diag_algorithm == "Lapack") then
+
+    allocate(eigenvectors(size(H_matrix_nonsym_dressed, 1),N_det))
+    allocate(eigenvalues(N_det))
+
+    call diag_nonsym_right( N_det, H_matrix_nonsym_dressed, size(H_matrix_nonsym_dressed, 1)       &
+                          , eigenvectors, size(eigenvectors, 1), eigenvalues, size(eigenvalues, 1) )
+
+    CI_electronic_energy_nonsym_dressed(:) = 0.d0
+
+    ! Select the "N_states_diag" states of lowest energy
+    do j = 1, min(N_det, N_states_diag)
+      do i = 1, N_det
+        CI_eigenvectors_nonsym_dressed(i,j) = eigenvectors(i,j)
+      enddo
+      CI_electronic_energy_nonsym_dressed(j) = eigenvalues(j)
+    enddo
+
+    deallocate(eigenvectors, eigenvalues)
+
+    ! --- ---
+
+  endif
+
+  ! ---
+
+END_PROVIDER
+
+! ---
+
+subroutine diagonalize_CI_nonsym_dressed()
+
+  BEGIN_DOC
+  !  Replace the coefficients of the CI states by the coefficients of the
+  !  eigenstates of the CI matrix
+  END_DOC
+
+  implicit none
+  integer :: i, j
+
+  PROVIDE dressing_delta
+
+  do j = 1, N_states
+    do i = 1, N_det
+      psi_coef(i,j) = CI_eigenvectors_nonsym_dressed(i,j)
+    enddo
+  enddo
+
+  SOFT_TOUCH psi_coef
+
+end subroutine diagonalize_CI_nonsym_dressed
+
+! ---
+
+BEGIN_PROVIDER [ double precision, H_matrix_nonsym_dressed, (N_det,N_det) ]
+
+  BEGIN_DOC
+  ! Dressed H with Delta_ij
+  END_DOC
+
+  implicit none
+  integer          :: i, j, l, k
+  double precision :: f
+
+  H_matrix_nonsym_dressed(1:N_det,1:N_det) = h_matrix_all_dets(1:N_det,1:N_det)
+
+  if(N_states == 1) then
+
+!    !symmetric formula
+!    l = dressed_column_idx(1)
+!    f = 1.0d0/psi_coef(l,1)
+!    do i=1,N_det
+!      h_matrix_nonsym_dressed(i,l) +=  dressing_column_h(i,1) *f
+!      h_matrix_nonsym_dressed(l,i) +=  dressing_column_h(i,1) *f
+!    enddo
+
+!    l = dressed_column_idx(1)
+!    f = 1.0d0 / psi_coef(l,1)
+!    do j = 1, N_det
+!      H_matrix_nonsym_dressed(j,l) += f * dressing_delta(j,1) 
+!    enddo
+
+    k = 1
+    l = 1
+    f = overlap_states_inv(k,l)
+    do j = 1, N_det
+      do i = 1, N_det
+        H_matrix_nonsym_dressed(i,j) = H_matrix_nonsym_dressed(i,j) + f * dressing_delta(i,k) * psi_coef(j,l)
+      enddo
+    enddo
+
+  else
+
+    do k = 1, N_states
+      do l = 1, N_states
+        f = overlap_states_inv(k,l)
+
+        do j = 1, N_det
+          do i = 1, N_det
+            H_matrix_nonsym_dressed(i,j) = H_matrix_nonsym_dressed(i,j) + f * dressing_delta(i,k) * psi_coef(j,l)
+          enddo
+        enddo
+
+      enddo
+    enddo
+
+  endif
+
+END_PROVIDER
+
+! ---
+
--- a/src/davidson_keywords/EZFIO.cfg
+++ b/src/davidson_keywords/EZFIO.cfg
@ -0,0 +1,54 @@
+[threshold_davidson]
+type: Threshold
+doc: Thresholds of Davidson's algorithm if threshold_davidson_from_pt2 is false.
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
+[threshold_nonsym_davidson]
+type: Threshold
+doc: Thresholds of non-symetric Davidson's algorithm
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
+[davidson_sze_max]
+type: Strictly_positive_int
+doc: Number of micro-iterations before re-contracting
+default: 15
+interface: ezfio,provider,ocaml
+
+[state_following]
+type: logical
+doc: If |true|, the states are re-ordered to match the input states
+default: False
+interface: ezfio,provider,ocaml
+
+[disk_based_davidson]
+type: logical
+doc: If |true|, a memory-mapped file may be used to store the W and S2 vectors if not enough RAM is availabl
+default: True
+interface: ezfio,provider,ocaml
+
+[n_states_diag]
+type: States_number
+doc: Controls the number of states to consider during the Davdison diagonalization. The number of states is n_states * n_states_diag
+default: 4
+interface: ezfio,ocaml
+
+[n_det_max_full]
+type: Det_number_max
+doc: Maximum number of determinants where |H| is fully diagonalized
+interface: ezfio,provider,ocaml
+default: 1000
+
+[threshold_davidson_from_pt2]
+type: logical
+doc: Thresholds of Davidson's algorithm is set to E(rPT2)*threshold_davidson_from_pt2
+interface: ezfio,provider,ocaml
+default: false
+
+[distributed_davidson]
+type: logical
+doc: If |true|, use the distributed algorithm
+default: True
+interface: ezfio,provider,ocaml
+
--- a/src/davidson_keywords/NEED
+++ b/src/davidson_keywords/NEED
@ -0,0 +1 @@
+ezfio_files
--- a/src/davidson_keywords/README.rst
+++ b/src/davidson_keywords/README.rst
@ -0,0 +1,5 @@
+=================
+davidson_keywords
+=================
+
+Keywords used for Davidson algorithms.
--- a/src/davidson_keywords/input.irp.f
+++ b/src/davidson_keywords/input.irp.f
@ -1,3 +1,6 @@
+
+! ---
+
 BEGIN_PROVIDER [ integer, n_states_diag  ]
  implicit none
  BEGIN_DOC
@ -8,11 +11,11 @@ BEGIN_PROVIDER [ integer, n_states_diag  ]
  PROVIDE ezfio_filename
  if (mpi_master) then

-    call ezfio_has_davidson_n_states_diag(has)
+    call ezfio_has_davidson_keywords_n_states_diag(has)
    if (has) then
-      call ezfio_get_davidson_n_states_diag(n_states_diag)
+      call ezfio_get_davidson_keywords_n_states_diag(n_states_diag)
    else
-      print *, 'davidson/n_states_diag not found in EZFIO file'
+      print *, 'davidson_keywords/n_states_diag not found in EZFIO file'
      stop 1
    endif
    n_states_diag = max(2,N_states * N_states_diag)
@ -32,3 +35,4 @@ BEGIN_PROVIDER [ integer, n_states_diag  ]

 END_PROVIDER

+! ---
--- a/src/davidson_keywords/usef.irp.f
+++ b/src/davidson_keywords/usef.irp.f
@ -0,0 +1,33 @@
+use bitmasks
+use f77_zmq
+
+
+! ---
+
+BEGIN_PROVIDER [ integer, nthreads_davidson ]
+ implicit none
+ BEGIN_DOC
+ ! Number of threads for Davidson
+ END_DOC
+ nthreads_davidson = nproc
+ character*(32) :: env
+ call getenv('QP_NTHREADS_DAVIDSON',env)
+ if (trim(env) /= '') then
+   read(env,*) nthreads_davidson
+   call write_int(6,nthreads_davidson,'Target number of threads for <Psi|H|Psi>')
+ endif
+END_PROVIDER
+
+! ---
+
+BEGIN_PROVIDER [ double precision, threshold_davidson_pt2 ]
+ implicit none
+ BEGIN_DOC
+ ! Threshold of Davidson's algorithm, using PT2 as a guide
+ END_DOC
+ threshold_davidson_pt2 = threshold_davidson
+
+END_PROVIDER
+
+! ---
+
--- a/src/davidson_undressed/null_dressing_vector.irp.f
+++ b/src/davidson_undressed/null_dressing_vector.irp.f
@ -1,10 +1,12 @@
 BEGIN_PROVIDER [ double precision, dressing_column_h, (N_det,N_states) ]
 &BEGIN_PROVIDER [ double precision, dressing_column_s, (N_det,N_states) ]
+&BEGIN_PROVIDER [ double precision, dressing_delta   , (N_det,N_states) ]
 implicit none
 BEGIN_DOC
 ! Null dressing vectors
 END_DOC
 dressing_column_h(:,:) = 0.d0
 dressing_column_s(:,:) = 0.d0
+ dressing_delta   (:,:) = 0.d0
 END_PROVIDER

--- a/src/determinants/spindeterminants.ezfio_config
+++ b/src/determinants/spindeterminants.ezfio_config
@ -9,8 +9,11 @@ spindeterminants
  psi_det_beta    integer*8 (spindeterminants_n_int*spindeterminants_bit_kind/8,spindeterminants_n_det_beta)
  psi_coef_matrix_rows    integer (spindeterminants_n_det)
  psi_coef_matrix_columns integer (spindeterminants_n_det)
-  psi_coef_matrix_values  double precision (spindeterminants_n_det,spindeterminants_n_states)
+  psi_coef_matrix_values      double precision (spindeterminants_n_det,spindeterminants_n_states)
+  psi_left_coef_matrix_values double precision (spindeterminants_n_det,spindeterminants_n_states)
  n_svd_coefs     integer
+  n_svd_alpha     integer
+  n_svd_beta      integer
  psi_svd_alpha   double precision (spindeterminants_n_det_alpha,spindeterminants_n_svd_coefs,spindeterminants_n_states)
  psi_svd_beta    double precision (spindeterminants_n_det_beta,spindeterminants_n_svd_coefs,spindeterminants_n_states)
  psi_svd_coefs   double precision (spindeterminants_n_svd_coefs,spindeterminants_n_states)
--- a/src/non_h_ints_mu/new_grad_tc.irp.f
+++ b/src/non_h_ints_mu/new_grad_tc.irp.f
@ -305,16 +305,8 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,

  if(read_tc_integ) then

-    open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao', action="read")
-      do i = 1, ao_num
-        do j = 1, ao_num
-          do k = 1, ao_num
-            do l = 1, ao_num
-              read(11) tc_grad_and_lapl_ao(l,k,j,i)
-            enddo
-          enddo
-        enddo
-      enddo
+    open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao', action="read")
+    read(11) tc_grad_and_lapl_ao
    close(11)

  else
@ -374,18 +366,12 @@ BEGIN_PROVIDER [double precision, tc_grad_and_lapl_ao, (ao_num, ao_num, ao_num,

  endif

-  if(write_tc_integ) then
-    open(unit=11, form="unformatted", file='tc_grad_and_lapl_ao', action="write")
-      do i = 1, ao_num
-        do j = 1, ao_num
-          do k = 1, ao_num
-            do l = 1, ao_num
-              write(11) tc_grad_and_lapl_ao(l,k,j,i)
-            enddo
-          enddo
-        enddo
-      enddo
+  if(write_tc_integ.and.mpi_master) then
+    open(unit=11, form="unformatted", file=trim(ezfio_filename)//'/work/tc_grad_and_lapl_ao', action="write")
+    call ezfio_set_work_empty(.False.)
+    write(11) tc_grad_and_lapl_ao
    close(11)
+    call ezfio_set_tc_keywords_io_tc_integ('Read')
  endif

  call wall_time(time1)
--- a/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f
+++ b/src/tc_bi_ortho/save_lr_bi_ortho_states.irp.f
@ -1,4 +1,4 @@
-program tc_bi_ortho
+program print_tc_energy
  implicit none
  BEGIN_DOC
 ! TODO : Put the documentation of the program here
@ -10,6 +10,6 @@ program tc_bi_ortho
  read_wf = .True.
  touch read_wf
  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
-  call routine_save_left_right_bi_ortho
-! call test
+ call write_tc_energy
 end
+
--- a/src/tc_bi_ortho/psi_left_qmc.irp.f
+++ b/src/tc_bi_ortho/psi_left_qmc.irp.f
@ -17,6 +17,8 @@ BEGIN_PROVIDER  [ double precision, psi_bitcleft_bilinear_matrix_values, (N_det,
  implicit none
  integer :: k, l

+  !print *, ' providing psi_bitcleft_bilinear_matrix_values'
+
  if(N_det .eq. 1) then

    do l = 1, N_states
@ -38,6 +40,8 @@ BEGIN_PROVIDER  [ double precision, psi_bitcleft_bilinear_matrix_values, (N_det,

  endif

+  !print *, ' psi_bitcleft_bilinear_matrix_values OK'
+
 END_PROVIDER

 ! ---
--- a/src/tc_bi_ortho/psi_r_l_prov.irp.f
+++ b/src/tc_bi_ortho/psi_r_l_prov.irp.f
@ -136,7 +136,7 @@ BEGIN_PROVIDER [ double precision, psi_r_coef_bi_ortho, (psi_det_size,N_states)
 END_PROVIDER


-subroutine save_tc_wavefunction_general(ndet,nstates,psidet,sze,dim_psicoef,psilcoef,psircoef)
+subroutine save_tc_wavefunction_general(ndet, nstates, psidet, sze, dim_psicoef, psilcoef, psircoef)
  implicit none
  BEGIN_DOC
  !  Save the wave function into the |EZFIO| file
@ -192,37 +192,78 @@ subroutine save_tc_wavefunction_general(ndet,nstates,psidet,sze,dim_psicoef,psil
  endif
 end

-subroutine save_tc_bi_ortho_wavefunction
- implicit none
- if(save_sorted_tc_wf)then
-  call save_tc_wavefunction_general(N_det,N_states,psi_det_sorted_tc,size(psi_det_sorted_tc, 3),size(psi_l_coef_sorted_bi_ortho, 1),psi_l_coef_sorted_bi_ortho,psi_r_coef_sorted_bi_ortho)
- else
-  call save_tc_wavefunction_general(N_det,N_states,psi_det,size(psi_det, 3), size(psi_l_coef_bi_ortho, 1),psi_l_coef_bi_ortho,psi_r_coef_bi_ortho)
- endif
- call routine_save_right_bi_ortho
+! ---
+
+subroutine save_tc_bi_ortho_wavefunction()
+
+  implicit none
+
+  if(save_sorted_tc_wf) then
+
+    call save_tc_wavefunction_general( N_det, N_states, psi_det_sorted_tc, size(psi_det_sorted_tc, 3) &
+                                     , size(psi_l_coef_sorted_bi_ortho, 1), psi_l_coef_sorted_bi_ortho, psi_r_coef_sorted_bi_ortho)
+    call routine_save_right_sorted_bi_ortho()
+
+  else
+
+    call save_tc_wavefunction_general( N_det, N_states, psi_det, size(psi_det, 3) &
+                                     , size(psi_l_coef_bi_ortho, 1), psi_l_coef_bi_ortho, psi_r_coef_bi_ortho )
+    call routine_save_right_bi_ortho()
+
+  endif
+
 end

-subroutine routine_save_right_bi_ortho
- implicit none
- double precision, allocatable :: coef_tmp(:,:)
- integer :: i
- allocate(coef_tmp(N_det, N_states))
- do i = 1, N_det
-  coef_tmp(i,1:N_states) = psi_r_coef_sorted_bi_ortho(i,1:N_states)
- enddo
- call save_wavefunction_general_unormalized(N_det,N_states,psi_det_sorted_tc,size(coef_tmp,1),coef_tmp(1,1))
-end                     
+! ---
+
+subroutine routine_save_right_sorted_bi_ortho()
+
+  implicit none
+  integer                       :: i
+  double precision, allocatable :: coef_tmp(:,:)
+
+  allocate(coef_tmp(N_det, N_states))
+  do i = 1, N_det
+    coef_tmp(i,1:N_states) = psi_r_coef_sorted_bi_ortho(i,1:N_states)
+  enddo
+  call save_wavefunction_general_unormalized(N_det, N_states, psi_det_sorted_tc, size(coef_tmp, 1), coef_tmp(1,1))
+  deallocate(coef_tmp)

-subroutine routine_save_left_right_bi_ortho
- implicit none
- double precision, allocatable :: coef_tmp(:,:)
- integer :: i,n_states_tmp
- n_states_tmp = 2
- allocate(coef_tmp(N_det, n_states_tmp))
- do i = 1, N_det
-  coef_tmp(i,1) = psi_r_coef_bi_ortho(i,1)
-  coef_tmp(i,2) = psi_l_coef_bi_ortho(i,1)
- enddo
- call save_wavefunction_general_unormalized(N_det,n_states_tmp,psi_det,size(coef_tmp,1),coef_tmp(1,1))
 end

+subroutine routine_save_left_right_sorted_bi_ortho()
+
+  implicit none
+  integer                       :: i, n_states_tmp
+  double precision, allocatable :: coef_tmp(:,:)
+
+  n_states_tmp = 2
+  allocate(coef_tmp(N_det, n_states_tmp))
+  do i = 1, N_det
+    coef_tmp(i,1) = psi_r_coef_bi_ortho(i,1)
+    coef_tmp(i,2) = psi_l_coef_bi_ortho(i,1)
+  enddo
+  call save_wavefunction_general_unormalized(N_det, n_states_tmp, psi_det, size(coef_tmp, 1), coef_tmp(1,1))
+  deallocate(coef_tmp)
+end
+
+! ---
+
+subroutine routine_save_right_bi_ortho()
+
+  implicit none
+  integer                       :: i
+  double precision, allocatable :: coef_tmp(:,:)
+
+  allocate(coef_tmp(N_det, N_states))
+  do i = 1, N_det
+    coef_tmp(i,1:N_states) = psi_r_coef_bi_ortho(i,1:N_states)
+  enddo
+  call save_wavefunction_general_unormalized(N_det, N_states, psi_det, size(coef_tmp, 1), coef_tmp(1,1))
+  deallocate(coef_tmp)
+
+end
+
+! ---
+
+
--- a/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet
+++ b/src/tc_bi_ortho/save_bitcpsileft_for_qmcchem.irp.pouet
@ -1,5 +1,18 @@
 program save_bitcpsileft_for_qmcchem

+  implicit none
+
+  read_wf = .True.
+  TOUCH read_wf
+
+  call main()
+
+end
+
+
+subroutine main()
+
+  implicit none
  integer          :: iunit
  logical          :: exists
  double precision :: e_ref
@ -46,7 +59,7 @@ program save_bitcpsileft_for_qmcchem

  close(iunit)

-end
+end subroutine main

 ! --

@ -61,12 +74,18 @@ subroutine write_lr_spindeterminants()

  PROVIDE psi_bitcleft_bilinear_matrix_values

+  print *, ' saving left determinants'
+  print *, ' assuming save_for_qmc called before to save right determinants'
+  print *, ' N_det    = ', N_det
+  print *, ' N_states = ', N_states
+
  allocate(buffer(N_det,N_states))
  do l = 1, N_states
    do k = 1, N_det
      buffer(k,l) = psi_bitcleft_bilinear_matrix_values(k,l)
    enddo
  enddo
+
  call ezfio_set_spindeterminants_psi_left_coef_matrix_values(buffer)
  deallocate(buffer)

--- a/src/tc_bi_ortho/tc_bi_ortho.irp.f
+++ b/src/tc_bi_ortho/tc_bi_ortho.irp.f
@ -1,16 +1,25 @@
 program tc_bi_ortho
-  implicit none
+
  BEGIN_DOC
-! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together with the energy. Saves the left-right wave functions at the end. 
+  !
+  ! TODO : Reads psi_det in the EZFIO folder and prints out the left- and right-eigenvectors together 
+  !        with the energy. Saves the left-right wave functions at the end. 
+  !
  END_DOC
+
  my_grid_becke = .True.
  my_n_pt_r_grid = 30
  my_n_pt_a_grid = 50
  read_wf = .True.
  touch read_wf
-  touch  my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
-  call routine_diag
-  call save_tc_bi_ortho_wavefunction
+  touch my_grid_becke my_n_pt_r_grid my_n_pt_a_grid
+
+  print*, ' nb of states = ', N_states
+  print*, ' nb of det    = ', N_det
+
+  call routine_diag()
+  call write_tc_energy()
+  call save_tc_bi_ortho_wavefunction()
 end

 subroutine test
@ -27,26 +36,53 @@ subroutine test

 end

-subroutine routine_diag
- implicit none
-! provide eigval_right_tc_bi_orth
-!  provide overlap_bi_ortho
-!  provide htilde_matrix_elmt_bi_ortho
- integer ::i,j
- print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1)
- print*,'e_tc_left_right         = ',e_tc_left_right
- print*,'e_tilde_bi_orth_00      = ',e_tilde_bi_orth_00
- print*,'e_pt2_tc_bi_orth        = ',e_pt2_tc_bi_orth
- print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single
- print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double
- print*,'***'
- print*,'e_corr_bi_orth          = ',e_corr_bi_orth
- print*,'e_corr_bi_orth_proj     = ',e_corr_bi_orth_proj
- print*,'e_corr_single_bi_orth   = ',e_corr_single_bi_orth
- print*,'e_corr_double_bi_orth   = ',e_corr_double_bi_orth
- print*,'Left/right eigenvectors'
- do i = 1,N_det
-  write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1)
- enddo
+subroutine routine_diag()
+
+  implicit none
+  integer          :: i, j, k
+  double precision :: dE
+
+  ! provide eigval_right_tc_bi_orth
+  ! provide overlap_bi_ortho
+  ! provide htilde_matrix_elmt_bi_ortho
+
+  if(N_states .eq. 1) then
+
+    print*,'eigval_right_tc_bi_orth = ',eigval_right_tc_bi_orth(1)
+    print*,'e_tc_left_right         = ',e_tc_left_right
+    print*,'e_tilde_bi_orth_00      = ',e_tilde_bi_orth_00
+    print*,'e_pt2_tc_bi_orth        = ',e_pt2_tc_bi_orth
+    print*,'e_pt2_tc_bi_orth_single = ',e_pt2_tc_bi_orth_single
+    print*,'e_pt2_tc_bi_orth_double = ',e_pt2_tc_bi_orth_double
+    print*,'***'
+    print*,'e_corr_bi_orth          = ',e_corr_bi_orth
+    print*,'e_corr_bi_orth_proj     = ',e_corr_bi_orth_proj
+    print*,'e_corr_single_bi_orth   = ',e_corr_single_bi_orth
+    print*,'e_corr_double_bi_orth   = ',e_corr_double_bi_orth
+    print*,'Left/right eigenvectors'
+    do i = 1,N_det
+      write(*,'(I5,X,(100(F12.7,X)))')i,leigvec_tc_bi_orth(i,1),reigvec_tc_bi_orth(i,1),leigvec_tc_bi_orth(i,1)*reigvec_tc_bi_orth(i,1)
+    enddo
+
+  else
+
+    print*,'eigval_right_tc_bi_orth : '
+    do i = 1, N_states
+      print*, i, eigval_right_tc_bi_orth(i)
+    enddo
+
+    print*,''
+    print*,'******************************************************'
+    print*,'TC Excitation energies (au)                     (eV)'
+    do i = 2, N_states
+      dE = eigval_right_tc_bi_orth(i) - eigval_right_tc_bi_orth(1)
+      print*, i, dE, dE/0.0367502d0
+    enddo
+    print*,''
+
+  endif
+
 end

+
+
--- a/src/tc_bi_ortho/tc_h_eigvectors.irp.f
+++ b/src/tc_bi_ortho/tc_h_eigvectors.irp.f
@ -43,7 +43,7 @@ end
  END_DOC

  implicit none
-  integer                       :: i, idx_dress, j, istate
+  integer                       :: i, idx_dress, j, istate, k
  logical                       :: converged, dagger
  integer                       :: n_real_tc_bi_orth_eigval_right,igood_r,igood_l
  double precision, allocatable :: reigvec_tc_bi_orth_tmp(:,:),leigvec_tc_bi_orth_tmp(:,:),eigval_right_tmp(:)
@ -52,116 +52,123 @@ end
  integer                        :: i_good_state,i_other_state, i_state
  integer, allocatable           :: index_good_state_array(:)
  logical, allocatable           :: good_state_array(:)
-  double precision, allocatable :: coef_hf_r(:),coef_hf_l(:)
+  double precision, allocatable  :: coef_hf_r(:),coef_hf_l(:)
+  double precision, allocatable  :: Stmp(:,:)
  integer, allocatable :: iorder(:)

  PROVIDE N_det N_int

-   if(n_det.le.N_det_max_full)then
+  if(n_det .le. N_det_max_full) then
+
    allocate(reigvec_tc_bi_orth_tmp(N_det,N_det),leigvec_tc_bi_orth_tmp(N_det,N_det),eigval_right_tmp(N_det),expect_e(N_det))
    allocate (H_prime(N_det,N_det),s2_values_tmp(N_det))
+
    H_prime(1:N_det,1:N_det) = htilde_matrix_elmt_bi_ortho(1:N_det,1:N_det)
-    if(s2_eig)then
-       H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det)
-       do j=1,N_det
-         H_prime(j,j) = H_prime(j,j) - alpha*expected_s2
-       enddo
+    if(s2_eig) then
+      H_prime(1:N_det,1:N_det) += alpha * S2_matrix_all_dets(1:N_det,1:N_det)
+      do j=1,N_det
+        H_prime(j,j) = H_prime(j,j) - alpha*expected_s2
+      enddo
    endif
-    call non_hrmt_real_diag(N_det,H_prime,& 
-         leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,& 
-         n_real_tc_bi_orth_eigval_right,eigval_right_tmp)
+
+    call non_hrmt_real_diag(N_det, H_prime, leigvec_tc_bi_orth_tmp, reigvec_tc_bi_orth_tmp, n_real_tc_bi_orth_eigval_right, eigval_right_tmp)
 !    do i = 1, N_det
 !     call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp(1,i),reigvec_tc_bi_orth_tmp(1,i),1,N_det,expect_e(i), s2_values_tmp(i))
 !    enddo
    call get_H_tc_s2_l0_r0(leigvec_tc_bi_orth_tmp,reigvec_tc_bi_orth_tmp,N_det,N_det,expect_e, s2_values_tmp)
+
    allocate(index_good_state_array(N_det),good_state_array(N_det))
    i_state = 0
    good_state_array = .False.
-    if(s2_eig)then
-     if (only_expected_s2) then
-       do j=1,N_det
+
+    if(s2_eig) then
+
+      if(only_expected_s2) then
+        do j = 1, N_det
         ! Select at least n_states states with S^2 values closed to "expected_s2"
 !         print*,'s2_values_tmp(j) = ',s2_values_tmp(j),eigval_right_tmp(j),expect_e(j)
-         if(dabs(s2_values_tmp(j)-expected_s2).le.0.5d0)then
-           i_state +=1
-           index_good_state_array(i_state) = j
-           good_state_array(j) = .True.
-         endif
-         if(i_state.eq.N_states) then
-           exit
-         endif
-       enddo
-     else
-       do j=1,N_det
-         index_good_state_array(j) = j
-         good_state_array(j) = .True.
-       enddo
-     endif
-     if(i_state .ne.0)then
-       ! Fill the first "i_state" states that have a correct S^2 value
-       do j = 1, i_state
-         do i=1,N_det
-           reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j))
-           leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j))
-         enddo
-         eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j))
-         eigval_left_tc_bi_orth(j)  = expect_e(index_good_state_array(j))
-         s2_eigvec_tc_bi_orth(j)    = s2_values_tmp(index_good_state_array(j))
-       enddo
-       i_other_state = 0
-       do j = 1, N_det
-         if(good_state_array(j))cycle
-         i_other_state +=1
-         if(i_state+i_other_state.gt.n_states)then
-           exit
-         endif
-         do i=1,N_det
-           reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j)
-           leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j)
-         enddo
-         eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j)
-         eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j)
-         s2_eigvec_tc_bi_orth(i_state+i_other_state)    = s2_values_tmp(i_state+i_other_state)
-       enddo
-     else ! istate == 0
-         print*,''
-         print*,'!!!!!!!!   WARNING  !!!!!!!!!'
-         print*,'  Within the ',N_det,'determinants selected'
-         print*,'  and the ',N_states_diag,'states requested'
-         print*,'  We did not find only states with S^2 values close to ',expected_s2
-         print*,'  We will then set the first N_states eigenvectors of the H matrix'
-         print*,'  as the CI_eigenvectors'
-         print*,'  You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space'
-         print*,''
-         do j=1,min(N_states_diag,N_det)
-           do i=1,N_det
-             leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j)
-             reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j)
-           enddo
-           eigval_right_tc_bi_orth(j) = eigval_right_tmp(j)
-           eigval_left_tc_bi_orth (j) = eigval_right_tmp(j)
-           s2_eigvec_tc_bi_orth(j)    = s2_values_tmp(j)
-         enddo
-     endif ! istate .ne. 0
+          if(dabs(s2_values_tmp(j) - expected_s2).le.0.5d0)then
+            i_state +=1
+            index_good_state_array(i_state) = j
+            good_state_array(j) = .True.
+          endif
+          if(i_state.eq.N_states) then
+            exit
+          endif
+        enddo
+      else
+        do j = 1, N_det
+          index_good_state_array(j) = j
+          good_state_array(j) = .True.
+        enddo
+      endif
+
+      if(i_state .ne. 0) then
+        ! Fill the first "i_state" states that have a correct S^2 value
+        do j = 1, i_state
+          do i = 1, N_det
+            reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,index_good_state_array(j))
+            leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,index_good_state_array(j))
+          enddo
+          eigval_right_tc_bi_orth(j) = expect_e(index_good_state_array(j))
+          eigval_left_tc_bi_orth(j)  = expect_e(index_good_state_array(j))
+          s2_eigvec_tc_bi_orth(j)    = s2_values_tmp(index_good_state_array(j))
+        enddo
+        i_other_state = 0
+        do j = 1, N_det
+          if(good_state_array(j))cycle
+          i_other_state +=1
+          if(i_state+i_other_state.gt.n_states)then
+            exit
+          endif
+          do i = 1, N_det
+            reigvec_tc_bi_orth(i,i_state+i_other_state) = reigvec_tc_bi_orth_tmp(i,j)
+            leigvec_tc_bi_orth(i,i_state+i_other_state) = leigvec_tc_bi_orth_tmp(i,j)
+          enddo
+          eigval_right_tc_bi_orth(i_state+i_other_state) = eigval_right_tmp(j)
+          eigval_left_tc_bi_orth (i_state+i_other_state) = eigval_right_tmp(j)
+          s2_eigvec_tc_bi_orth(i_state+i_other_state)    = s2_values_tmp(i_state+i_other_state)
+        enddo
+      else ! istate == 0
+        print*,''
+        print*,'!!!!!!!!   WARNING  !!!!!!!!!'
+        print*,'  Within the ',N_det,'determinants selected'
+        print*,'  and the ',N_states_diag,'states requested'
+        print*,'  We did not find only states with S^2 values close to ',expected_s2
+        print*,'  We will then set the first N_states eigenvectors of the H matrix'
+        print*,'  as the CI_eigenvectors'
+        print*,'  You should consider more states and maybe ask for s2_eig to be .True. or just enlarge the CI space'
+        print*,''
+        do j = 1, min(N_states_diag, N_det)
+          do i = 1, N_det
+            leigvec_tc_bi_orth(i,j) = leigvec_tc_bi_orth_tmp(i,j)
+            reigvec_tc_bi_orth(i,j) = reigvec_tc_bi_orth_tmp(i,j)
+          enddo
+          eigval_right_tc_bi_orth(j) = eigval_right_tmp(j)
+          eigval_left_tc_bi_orth (j) = eigval_right_tmp(j)
+          s2_eigvec_tc_bi_orth(j)    = s2_values_tmp(j)
+        enddo
+      endif ! istate .ne. 0

    else ! s2_eig
-       allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det))
-       do i = 1,N_det
+
+      allocate(coef_hf_r(N_det),coef_hf_l(N_det),iorder(N_det))
+      do i = 1,N_det
        iorder(i) = i
        coef_hf_r(i) = -dabs(reigvec_tc_bi_orth_tmp(index_HF_psi_det,i))
-       enddo
-       call dsort(coef_hf_r,iorder,N_det)
-       igood_r = iorder(1)
-       print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1)
-       do i = 1,N_det
+      enddo
+      call dsort(coef_hf_r,iorder,N_det)
+      igood_r = iorder(1)
+      print*,'igood_r, coef_hf_r = ',igood_r,coef_hf_r(1)
+      do i = 1,N_det
        iorder(i) = i
        coef_hf_l(i) = -dabs(leigvec_tc_bi_orth_tmp(index_HF_psi_det,i))
-       enddo
-       call dsort(coef_hf_l,iorder,N_det)
-       igood_l = iorder(1)
-       print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1)
+      enddo
+      call dsort(coef_hf_l,iorder,N_det)
+      igood_l = iorder(1)
+      print*,'igood_l, coef_hf_l = ',igood_l,coef_hf_l(1)
       
-       if(igood_r.ne.igood_l.and.igood_r.ne.1)then
+      if(igood_r.ne.igood_l .and. igood_r.ne.1) then
        print *,''
        print *,'Warning, the left and right eigenvectors are "not the same" '
        print *,'Warning, the ground state is not dominated by HF...'
@ -169,22 +176,22 @@ end
        print *,'coef of HF in RIGHT eigenvector = ',reigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_r)
        print *,'State with largest LEFT  coefficient of HF ',igood_l
        print *,'coef of HF in LEFT  eigenvector = ',leigvec_tc_bi_orth_tmp(index_HF_psi_det,igood_l)
-       endif
-       if(state_following_tc)then
+      endif
+
+      if(state_following_tc) then
        print *,'Following the states with the largest coef on HF'
        print *,'igood_r,igood_l',igood_r,igood_l
-        i= igood_r
+        i = igood_r
        eigval_right_tc_bi_orth(1) = eigval_right_tmp(i)
        do j = 1, N_det
          reigvec_tc_bi_orth(j,1) = reigvec_tc_bi_orth_tmp(j,i)
-!          print*,reigvec_tc_bi_orth(j,1)
        enddo
-        i= igood_l
+        i = igood_l
        eigval_left_tc_bi_orth(1)  = eigval_right_tmp(i)
        do j = 1, N_det
          leigvec_tc_bi_orth(j,1) = leigvec_tc_bi_orth_tmp(j,i)
        enddo
-       else 
+      else 
        do i = 1, N_states
          eigval_right_tc_bi_orth(i) = eigval_right_tmp(i)
          eigval_left_tc_bi_orth(i)  = eigval_right_tmp(i)
@ -193,9 +200,12 @@ end
            leigvec_tc_bi_orth(j,i) = leigvec_tc_bi_orth_tmp(j,i)
          enddo
        enddo
-       endif
+      endif
+
    endif
-   else 
+
+  else ! n_det > N_det_max_full
+
    double precision, allocatable :: H_jj(:),vec_tmp(:,:)
    external                         htc_bi_ortho_calc_tdav
    external                         htcdag_bi_ortho_calc_tdav
@ -203,36 +213,39 @@ end
    external                         H_tc_dagger_u_0_opt
    external                         H_tc_s2_dagger_u_0_opt
    external                         H_tc_s2_u_0_opt
+
    allocate(H_jj(N_det),vec_tmp(N_det,n_states_diag))
+
    do i = 1, N_det
      call htilde_mu_mat_bi_ortho_tot(psi_det(1,1,i), psi_det(1,1,i), N_int, H_jj(i))
    enddo
- !!!! Preparing the left-eigenvector
+
    print*,'---------------------------------'
    print*,'---------------------------------'
    print*,'Computing the left-eigenvector '
    print*,'---------------------------------'
    print*,'---------------------------------'
+    !!!! Preparing the left-eigenvector
    vec_tmp = 0.d0
    do istate = 1, N_states
-     vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate)
+      vec_tmp(1:N_det,istate) = psi_l_coef_bi_ortho(1:N_det,istate)
    enddo
    do istate = N_states+1, n_states_diag
-     vec_tmp(istate,istate) = 1.d0
+      vec_tmp(istate,istate) = 1.d0
    enddo
-!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav)
-!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt)
+    !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, htcdag_bi_ortho_calc_tdav)
+    !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_dagger_u_0_opt)
    integer :: n_it_max,i_it
    n_it_max = 1
    converged = .False.
    i_it = 0
    do while (.not.converged)
-     call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt)
-     i_it += 1
-     if(i_it .gt. 5)exit
+      call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_left_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt)
+      i_it += 1
+      if(i_it .gt. 5) exit
    enddo
    do istate = 1, N_states
-     leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
+      leigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
    enddo

    print*,'---------------------------------'
@ -240,78 +253,125 @@ end
    print*,'Computing the right-eigenvector '
    print*,'---------------------------------'
    print*,'---------------------------------'
- !!!! Preparing the right-eigenvector
+    !!!! Preparing the right-eigenvector
    vec_tmp = 0.d0
    do istate = 1, N_states
-     vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate)
+      vec_tmp(1:N_det,istate) = psi_r_coef_bi_ortho(1:N_det,istate)
    enddo
    do istate = N_states+1, n_states_diag
-     vec_tmp(istate,istate) = 1.d0
+      vec_tmp(istate,istate) = 1.d0
    enddo
-!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
-!    call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt)
+    !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, htc_bi_ortho_calc_tdav)
+    !call davidson_general_ext_rout_nonsym_b1space(vec_tmp, H_jj, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, converged, H_tc_u_0_opt)
    converged = .False.
    i_it = 0
-    do while (.not.converged)
-     call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_dagger_u_0_opt)
-     i_it += 1
-     if(i_it .gt. 5)exit
+    do while (.not. converged)
+      call davidson_hs2_nonsym_b1space(vec_tmp, H_jj, s2_eigvec_tc_bi_orth, eigval_right_tc_bi_orth, N_det, n_states, n_states_diag, n_it_max, converged, H_tc_s2_u_0_opt)
+      i_it += 1
+      if(i_it .gt. 5) exit
    enddo
    do istate = 1, N_states
-     reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
+      reigvec_tc_bi_orth(1:N_det,istate) = vec_tmp(1:N_det,istate)
    enddo

    deallocate(H_jj)
-   endif
-  call bi_normalize(leigvec_tc_bi_orth,reigvec_tc_bi_orth,size(reigvec_tc_bi_orth,1),N_det,N_states)
-   print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ',leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1)
-   norm_ground_left_right_bi_orth = 0.d0
-   do j = 1, N_det
-    norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,1) * reigvec_tc_bi_orth(j,1)
-   enddo
-   print*,'norm l/r = ',norm_ground_left_right_bi_orth
-   print*,'<S2>     = ',s2_eigvec_tc_bi_orth(1)
+  endif
+
+  call bi_normalize(leigvec_tc_bi_orth, reigvec_tc_bi_orth, size(reigvec_tc_bi_orth, 1), N_det, N_states)
+  ! check bi-orthogonality
+  allocate(Stmp(N_states,N_states))
+  call dgemm( 'T', 'N', N_states, N_states, N_det, 1.d0                                                              &
+        , leigvec_tc_bi_orth(1,1), size(leigvec_tc_bi_orth, 1), reigvec_tc_bi_orth(1,1), size(reigvec_tc_bi_orth, 1) &
+        , 0.d0, Stmp(1,1), size(Stmp, 1) )
+  print *, ' overlap matrix between states:'
+  do i = 1, N_states
+    write(*,'(1000(F16.10,X))') Stmp(i,:)
+  enddo
+  deallocate(Stmp)
+
+  print*,'leigvec_tc_bi_orth(1,1),reigvec_tc_bi_orth(1,1) = ', leigvec_tc_bi_orth(1,1), reigvec_tc_bi_orth(1,1)
+  do i = 1, N_states
+    norm_ground_left_right_bi_orth = 0.d0
+    do j = 1, N_det
+      norm_ground_left_right_bi_orth += leigvec_tc_bi_orth(j,i) * reigvec_tc_bi_orth(j,i)
+    enddo
+    print*,' state      ', i
+    print*,' norm l/r = ', norm_ground_left_right_bi_orth
+    print*,' <S2>     = ', s2_eigvec_tc_bi_orth(i)
+  enddo
+
+  double precision, allocatable :: buffer(:,:)
+  allocate(buffer(N_det,N_states))
+  do k = 1, N_states
+    do i = 1, N_det
+      psi_l_coef_bi_ortho(i,k) = leigvec_tc_bi_orth(i,k)
+      buffer(i,k) = leigvec_tc_bi_orth(i,k)
+    enddo
+  enddo
+  TOUCH psi_l_coef_bi_ortho
+  call ezfio_set_tc_bi_ortho_psi_l_coef_bi_ortho(buffer)
+  do k = 1, N_states
+    do i = 1, N_det
+      psi_r_coef_bi_ortho(i,k) = reigvec_tc_bi_orth(i,k)
+      buffer(i,k) = reigvec_tc_bi_orth(i,k)
+    enddo
+  enddo
+  TOUCH psi_r_coef_bi_ortho
+  call ezfio_set_tc_bi_ortho_psi_r_coef_bi_ortho(buffer)
+  deallocate(buffer)

 END_PROVIDER 



-subroutine bi_normalize(u_l,u_r,n,ld,nstates)
+subroutine bi_normalize(u_l, u_r, n, ld, nstates)
+
+  BEGIN_DOC
  !!!! Normalization of the scalar product of the left/right eigenvectors
+  END_DOC
+
+  implicit none
+  integer,          intent(in)    :: n, ld, nstates
  double precision, intent(inout) :: u_l(ld,nstates), u_r(ld,nstates)
-  integer, intent(in) :: n,ld,nstates
-  integer :: i
-  double precision  :: accu, tmp 
+  integer                         :: i, j
+  double precision                :: accu, tmp
+
  do i = 1, nstates
-   !!!! Normalization of right eigenvectors |Phi>
-   accu = 0.d0
-   do j = 1, n
-    accu += u_r(j,i) * u_r(j,i)
-   enddo
-   accu = 1.d0/dsqrt(accu)
-   print*,'accu_r = ',accu
-   do j = 1, n
-    u_r(j,i) *= accu 
-   enddo
-   tmp = u_r(1,i) / dabs(u_r(1,i))
-   do j = 1, n
-    u_r(j,i) *= tmp
-   enddo
-   !!!! Adaptation of the norm of the left eigenvector such that <chi|Phi> = 1
-   accu = 0.d0
-   do j = 1, n
-    accu += u_l(j,i) * u_r(j,i)
-!    print*,j, u_l(j,i) , u_r(j,i)
-   enddo
-   if(accu.gt.0.d0)then
+
+    !!!! Normalization of right eigenvectors |Phi>
+    accu = 0.d0
+    do j = 1, n
+      accu += u_r(j,i) * u_r(j,i)
+    enddo
    accu = 1.d0/dsqrt(accu)
-   else
-    accu = 1.d0/dsqrt(-accu)
-   endif
-   tmp = (u_l(1,i) * u_r(1,i) )/dabs(u_l(1,i) * u_r(1,i))
-   do j = 1, n
-    u_l(j,i) *= accu * tmp
-    u_r(j,i) *= accu 
-   enddo
+    print*,'accu_r = ',accu
+    do j = 1, n
+      u_r(j,i) *= accu
+    enddo
+    tmp = u_r(1,i) / dabs(u_r(1,i))
+    do j = 1, n
+      u_r(j,i) *= tmp
+    enddo
+
+    !!!! Adaptation of the norm of the left eigenvector such that <chi|Phi> = 1
+    accu = 0.d0
+    do j = 1, n
+      accu += u_l(j,i) * u_r(j,i)
+      !print*,j, u_l(j,i) , u_r(j,i)
+    enddo
+    print*,'accu_lr = ', accu
+    if(accu.gt.0.d0)then
+      accu = 1.d0/dsqrt(accu)
+    else
+      accu = 1.d0/dsqrt(-accu)
+    endif
+    tmp = (u_l(1,i) * u_r(1,i) )/dabs(u_l(1,i) * u_r(1,i))
+    do j = 1, n
+      u_l(j,i) *= accu * tmp
+      u_r(j,i) *= accu
+    enddo
+
  enddo
+
 end
+
--- a/src/tc_bi_ortho/tc_hmat.irp.f
+++ b/src/tc_bi_ortho/tc_hmat.irp.f
@ -12,6 +12,11 @@
  double precision :: hmono,htwoe,hthree,htot

  PROVIDE N_int
+
+  i = 1
+  j = 1
+  call htilde_mu_mat_bi_ortho(psi_det(1,1,j), psi_det(1,1,i), N_int, hmono, htwoe, hthree, htot)
+
 !$OMP PARALLEL DO SCHEDULE(GUIDED) DEFAULT(NONE) PRIVATE(i,j,hmono, htwoe, hthree, htot) &
 !$OMP SHARED (N_det, psi_det, N_int,htilde_matrix_elmt_bi_ortho)
    do i = 1, N_det
--- a/src/tc_bi_ortho/tc_utils.irp.f
+++ b/src/tc_bi_ortho/tc_utils.irp.f
@ -0,0 +1,34 @@
+
+subroutine write_tc_energy()
+
+  implicit none
+  integer          :: i, j, k
+  double precision :: hmono, htwoe, hthree, htot
+  double precision :: E_TC, O_TC
+
+  do k = 1, n_states
+
+    E_TC = 0.d0
+    do i = 1, N_det
+      do j = 1, N_det
+        !htot = htilde_matrix_elmt_bi_ortho(i,j)
+        call htilde_mu_mat_bi_ortho(psi_det(1,1,i), psi_det(1,1,j), N_int, hmono, htwoe, hthree, htot)
+        E_TC = E_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(j,k) * htot
+        !E_TC = E_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(j,k) * htot
+      enddo
+    enddo
+
+    O_TC = 0.d0
+    do i = 1, N_det
+      !O_TC = O_TC + leigvec_tc_bi_orth(i,k) * reigvec_tc_bi_orth(i,k)
+      O_TC = O_TC + psi_l_coef_bi_ortho(i,k) * psi_r_coef_bi_ortho(i,k)
+    enddo
+
+    print *, ' state :', k
+    print *, " E_TC = ", E_TC / O_TC
+    print *, " O_TC = ", O_TC
+
+  enddo
+
+end
+