diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f
index d6b9cc79..374b003d 100644
--- a/plugins/MRCC_Utils/mrcc_utils.irp.f
+++ b/plugins/MRCC_Utils/mrcc_utils.irp.f
@@ -8,6 +8,7 @@ END_PROVIDER
  BEGIN_PROVIDER [ double precision, lambda_mrcc, (N_states, N_det_non_ref) ]
 &BEGIN_PROVIDER [ integer, lambda_mrcc_pt2, (0:psi_det_size) ]
 &BEGIN_PROVIDER [ integer, lambda_mrcc_kept, (0:psi_det_size) ]
+&BEGIN_PROVIDER [ double precision, lambda_pert, (N_states, N_det_non_ref) ] 
   implicit none
   BEGIN_DOC
   ! cm/<Psi_0|H|D_m> or perturbative 1/Delta_E(m)
@@ -15,7 +16,7 @@ END_PROVIDER
   integer :: i,k
   double precision               :: ihpsi_current(N_states)
   integer                        :: i_pert_count
-  double precision               :: hii, lambda_pert
+  double precision               :: hii, E2(N_states), E2var(N_states)
   integer                        :: N_lambda_mrcc_pt2, N_lambda_mrcc_pt3
   
   i_pert_count = 0
@@ -25,6 +26,8 @@ END_PROVIDER
   lambda_mrcc_pt2(0) = 0
   lambda_mrcc_kept(0) = 0
 
+  E2 = 0.d0
+  E2var = 0.d0
   do i=1,N_det_non_ref
     call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,&
         size(psi_ref_coef,1), N_states,ihpsi_current)
@@ -33,24 +36,51 @@ END_PROVIDER
       if (ihpsi_current(k) == 0.d0) then
         ihpsi_current(k) = 1.d-32
       endif
-!      lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) 
-      lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) )
-      lambda_pert = 1.d0 / (psi_ref_energy_diagonalized(k)-hii)
-      if (lambda_pert / lambda_mrcc(k,i)  < 0.5d0) then
-        ! Ignore lamdba
-        i_pert_count += 1
-        lambda_mrcc(k,i) = 0.d0
-        if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then
-          N_lambda_mrcc_pt2 += 1
-          lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i
-        endif
-      else
-        ! Keep lamdba
-        if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then
-          N_lambda_mrcc_pt3 += 1
-          lambda_mrcc_kept(N_lambda_mrcc_pt3) = i
-        endif
+      lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) 
+      lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii)
+      E2(k) += ihpsi_current(k)*ihpsi_current(k) / (psi_ref_energy_diagonalized(k)-hii)
+      E2var(k) += ihpsi_current(k) * psi_non_ref_coef(i,k)
+    enddo
+  enddo
+
+  do i=1,N_det_non_ref
+    call i_h_psi(psi_non_ref(1,1,i), psi_ref, psi_ref_coef, N_int, N_det_ref,&
+        size(psi_ref_coef,1), N_states,ihpsi_current)
+    call i_H_j(psi_non_ref(1,1,i),psi_non_ref(1,1,i),N_int,hii)
+    do k=1,N_states
+      if (ihpsi_current(k) == 0.d0) then
+        ihpsi_current(k) = 1.d-32
       endif
+      lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) 
+      lambda_pert(k,i) = 1.d0 / (psi_ref_energy_diagonalized(k)-hii) * E2var(k)/E2(k)
+!      lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) )
+!      if ( lambda_pert / lambda_mrcc(k,i)  < 0.5d0) then
+!      print *,  ' xxx ', dabs(lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k)  
+!      if ( dabs( (lambda_pert - lambda_mrcc(k,i)) * ihpsi_current(k) ) > 1.d-3) then
+! print *,  'lambda mrcc', lambda_mrcc(k,i)
+! print *,  'lambda pert', lambda_pert
+! print *,  'coef: ', psi_non_ref_coef(i,k)
+!        call debug_det(psi_non_ref(1,1,i), N_int)
+!        call i_H_j(psi_ref(1,1,1),psi_non_ref(1,1,i),N_int,hii)
+! print *,  hii
+!        call i_H_j(psi_ref(1,1,2),psi_non_ref(1,1,i),N_int,hii)
+! print *,  hii
+! print *,  '---'
+        ! Ignore lamdba
+!        i_pert_count += 1
+!        lambda_mrcc(k,i) = 0.d0
+!        lambda_mrcc(k,i) = lambda_pert * E2var(k)/E2(k)
+!        if (lambda_mrcc_pt2(N_lambda_mrcc_pt2) /= i) then
+!          N_lambda_mrcc_pt2 += 1
+!          lambda_mrcc_pt2(N_lambda_mrcc_pt2) = i
+!        endif
+!      else
+!        ! Keep lamdba
+!        if (lambda_mrcc_kept(N_lambda_mrcc_pt3) /= i) then
+!          N_lambda_mrcc_pt3 += 1
+!          lambda_mrcc_kept(N_lambda_mrcc_pt3) = i
+!        endif
+!      endif
     enddo
   enddo
   lambda_mrcc_pt2(0) = N_lambda_mrcc_pt2
@@ -784,8 +814,8 @@ END_PROVIDER
         f = psi_non_ref_coef(i,s) / rho_mrcc(i,s)
 
         ! Avoid numerical instabilities
-        f = min(f,2.d0)
-        f = max(f,-2.d0)
+!        f = min(f,2.d0)
+!        f = max(f,-2.d0)
       endif
 
        norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s)
@@ -845,11 +875,39 @@ END_PROVIDER
 
 
 
+double precision function f_fit(x)
+  implicit none
+  double precision :: x
+  f_fit = 0.d0
+  return
+  if (x < 0.d0) then
+    f_fit = 0.d0
+  else if (x < 1.d0) then
+    f_fit = 1.d0/0.367879441171442 * ( x**2 * exp(-x**2))
+  else
+    f_fit = 1.d0
+  endif
+end
 
 double precision function get_dij_index(II, i, s, Nint)
   integer, intent(in) :: II, i, s, Nint
   double precision, external :: get_dij
-  double precision :: HIi, phase
+  double precision :: HIi, phase, c, a, b, d
+
+  call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,i), Nint, HIi)
+  call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int)
+
+  a = lambda_pert(s,i)
+!  b = lambda_mrcc(s,i)
+!  c = f_fit(a/b)
+
+  d = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase* rho_mrcc(i,s)
+
+  c = f_fit(a*HIi/d)
+
+!  get_dij_index = HIi * a * c + (1.d0 - c) * d
+  get_dij_index = d
+  return
 
   if(lambda_type == 0) then
     call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int)
diff --git a/plugins/Psiref_Utils/psi_ref_utils.irp.f b/plugins/Psiref_Utils/psi_ref_utils.irp.f
index c4147ebc..95c993f0 100644
--- a/plugins/Psiref_Utils/psi_ref_utils.irp.f
+++ b/plugins/Psiref_Utils/psi_ref_utils.irp.f
@@ -98,8 +98,7 @@ END_PROVIDER
  enddo
  N_det_non_ref = i_non_ref
  if (N_det_non_ref < 1) then
-   print *,  'Error : All determinants are in the reference'
-   stop -1
+   print *,  'Warning : All determinants are in the reference'
  endif
 END_PROVIDER
 
diff --git a/src/Davidson/davidson_parallel.irp.f b/src/Davidson/davidson_parallel.irp.f
index 724aac08..04b0cc52 100644
--- a/src/Davidson/davidson_parallel.irp.f
+++ b/src/Davidson/davidson_parallel.irp.f
@@ -144,13 +144,13 @@ subroutine davidson_collect(N, idx, vt, st , v0t, s0t)
 end subroutine
 
 
-subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut)
+subroutine davidson_init(zmq_to_qp_run_socket,u,n0,n,n_st)
   use f77_zmq
   implicit none
   
   integer(ZMQ_PTR), intent(out)  :: zmq_to_qp_run_socket
-  integer, intent(in) :: n, n_st_8
-  double precision, intent(in) :: ut(n_st_8,n)
+  integer, intent(in) :: n0,n, n_st
+  double precision, intent(in) :: u(n0,n_st)
   integer :: i,k
 
   
@@ -164,8 +164,8 @@ subroutine davidson_init(zmq_to_qp_run_socket,n,n_st_8,ut)
     enddo
   enddo
   do i=1,n
-    do k=1,N_states_diag
-      dav_ut(k,i) = ut(k,i)
+    do k=1,n_st
+      dav_ut(k,i) = u(i,k)
     enddo
   enddo
 
@@ -285,6 +285,7 @@ subroutine davidson_slave_work(zmq_to_qp_run_socket, zmq_socket_push, worker_id)
     call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
     call davidson_push_results(zmq_socket_push, blockb, blockb2, N, idx, vt, st, task_id)
   end do
+  deallocate(idx, vt, st)
 
 end subroutine
 
@@ -411,8 +412,8 @@ subroutine davidson_collector(zmq_to_qp_run_socket, zmq_socket_pull , v0, s0, LD
   allocate(v0t(N_states_diag, dav_size)) 
   allocate(s0t(N_states_diag, dav_size)) 
   
-  v0t = 00.d0
-  s0t = 00.d0
+  v0t = 0.d0
+  s0t = 0.d0
 
   more = 1
   
@@ -456,23 +457,12 @@ subroutine davidson_run(zmq_to_qp_run_socket , v0, s0, LDA)
   double precision    , intent(inout)     :: s0(LDA, N_states_diag)
   
   
-  PROVIDE nproc
-  
-  !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(i)
-  i = omp_get_thread_num()
-  if (i == 0 ) then
-    zmq_collector = new_zmq_to_qp_run_socket()
-    zmq_socket_pull = new_zmq_pull_socket()
-    call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
-    call end_zmq_to_qp_run_socket(zmq_collector)
-    call end_zmq_pull_socket(zmq_socket_pull)
-    call davidson_miniserver_end()
-  else if (i == 1 ) then
-    call davidson_miniserver_run ()
-  else
-    call davidson_slave_inproc(i)
-  endif
-  !$OMP END PARALLEL
+  zmq_collector = new_zmq_to_qp_run_socket()
+  zmq_socket_pull = new_zmq_pull_socket()
+  call davidson_collector(zmq_collector, zmq_socket_pull , v0, s0, LDA)
+  call end_zmq_to_qp_run_socket(zmq_collector)
+  call end_zmq_pull_socket(zmq_socket_pull)
+  call davidson_miniserver_end()
 
 end subroutine
 
diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f
index 1901525b..4b36e030 100644
--- a/src/Davidson/diagonalization_hs2.irp.f
+++ b/src/Davidson/diagonalization_hs2.irp.f
@@ -122,6 +122,10 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
     stop -1
   endif
   
+  integer, external              :: align_double
+  sze_8 = align_double(sze)
+  itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
+  
   PROVIDE nuclear_repulsion expected_s2
   
   call write_time(iunit)
@@ -134,6 +138,9 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   call write_int(iunit,N_st,'Number of states')
   call write_int(iunit,N_st_diag,'Number of states in diagonalization')
   call write_int(iunit,sze,'Number of determinants')
+  r1 = 8.d0*(3.d0*dble(sze_8*N_st_diag*itermax+5.d0*(N_st_diag*itermax)**2 & 
+    + 4.d0*(N_st_diag*itermax))/(1024.d0**3))
+  call write_double(iunit, r1, 'Memory(Gb)')
   write(iunit,'(A)') ''
   write_buffer = '===== '
   do i=1,N_st
@@ -151,14 +158,14 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   enddo
   write(iunit,'(A)') trim(write_buffer)
   
-  integer, external              :: align_double
-  sze_8 = align_double(sze)
-  
-  itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
+
   allocate(                                                          &
+      ! Large
       W(sze_8,N_st_diag*itermax),                                    &
       U(sze_8,N_st_diag*itermax),                                    &
       S(sze_8,N_st_diag*itermax),                                    &
+
+      ! Small
       h(N_st_diag*itermax,N_st_diag*itermax),                        &
       y(N_st_diag*itermax,N_st_diag*itermax),                        &
       s_(N_st_diag*itermax,N_st_diag*itermax),                       &
diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f
index 026921d0..42e61b3a 100644
--- a/src/Davidson/u0Hu0.irp.f
+++ b/src/Davidson/u0Hu0.irp.f
@@ -57,8 +57,6 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
   integer                        :: N_st_8
   
   integer, external              :: align_double
-  integer :: blockb, blockb2, istep
-  double precision :: ave_workload, workload, target_workload_inv
   
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
 
@@ -286,19 +284,16 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
   double precision, intent(in)   :: H_jj(n), S2_jj(n)
   integer(bit_kind),intent(in)   :: keys_tmp(Nint,2,n)
   double precision               :: hij,s2 
-  double precision, allocatable  :: ut(:,:)
   integer                        :: i,j,k,l, jj,ii
   integer                        :: i0, j0, ithread
   
-  integer, allocatable           :: shortcut(:,:), sort_idx(:)
-  integer(bit_kind), allocatable :: sorted(:,:), version(:,:)
   integer(bit_kind)              :: sorted_i(Nint)
   
   integer                        :: sh, sh2, ni, exa, ext, org_i, org_j, endi, istate
   integer                        :: N_st_8
   
   integer, external              :: align_double
-  integer :: blockb, blockb2, istep
+  integer :: blockb2, istep
   double precision :: ave_workload, workload, target_workload_inv
   
   integer(ZMQ_PTR) :: handler
@@ -311,61 +306,52 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
   ASSERT (n>0)
   PROVIDE ref_bitmask_energy 
 
-  allocate (shortcut(0:n+1,2), sort_idx(n), sorted(Nint,n), version(Nint,n))
-  allocate(ut(N_st_8,n))
-
   v_0 = 0.d0
   s_0 = 0.d0
   
-  do i=1,n
-    do istate=1,N_st
-      ut(istate,i) =  u_0(i,istate)
-    enddo
-  enddo
-  call sort_dets_ab_v(keys_tmp, sorted, sort_idx, shortcut(0,1), version, n, Nint)
-  call sort_dets_ba_v(keys_tmp, sorted, sort_idx, shortcut(0,2), version, n, Nint)
-    
-  blockb = shortcut(0,1)
-  call davidson_init(handler,n,N_st_8,ut)
+  call davidson_init(handler,u_0,size(u_0,1),n,N_st)
+
+  ave_workload = 0.d0
+  do sh=1,shortcut_(0,1)
+    ave_workload += shortcut_(0,1)
+    ave_workload += (shortcut_(sh+1,1) - shortcut_(sh,1))**2
+    do i=sh, shortcut_(0,2), shortcut_(0,1)
+      do j=i, min(i, shortcut_(0,2))
+        ave_workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2
+      end do
+    end do
+  enddo
+  ave_workload = ave_workload/dble(shortcut_(0,1))
+  target_workload_inv = 0.01d0/ave_workload
 
-  
   PROVIDE nproc
-  
-  !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread)
+
+  !$OMP PARALLEL NUM_THREADS(nproc+2) PRIVATE(ithread,sh,i,j, &
+  !$OMP  workload,istep,blockb2)
   ithread = omp_get_thread_num()
   if (ithread == 0 ) then
-
-    call zmq_set_running(handler)
-    ave_workload = 0.d0
-    do sh=1,shortcut(0,1)
-      ave_workload += shortcut(0,1)
-      ave_workload += (shortcut(sh+1,1) - shortcut(sh,1))**2
-      do i=sh, shortcut(0,2), shortcut(0,1)
-        do j=i, min(i, shortcut(0,2))
-          ave_workload += (shortcut(j+1,2) - shortcut(j, 2))**2
-        end do
-      end do
-    enddo
-    ave_workload = ave_workload/dble(shortcut(0,1))
-    target_workload_inv = 0.01d0/ave_workload
-
-
-    do sh=1,shortcut(0,1),1
-      workload = shortcut(0,1)+dble(shortcut(sh+1,1) - shortcut(sh,1))**2
-      do i=sh, shortcut(0,2), shortcut(0,1)
-        do j=i, min(i, shortcut(0,2))
-          workload += (shortcut(j+1,2) - shortcut(j, 2))**2
+    do sh=1,shortcut_(0,1),1
+      workload = shortcut_(0,1)+dble(shortcut_(sh+1,1) - shortcut_(sh,1))**2
+      do i=sh, shortcut_(0,2), shortcut_(0,1)
+        do j=i, min(i, shortcut_(0,2))
+          workload += (shortcut_(j+1,2) - shortcut_(j, 2))**2
         end do
       end do
       istep = 1+ int(workload*target_workload_inv)
       do blockb2=0, istep-1
         call davidson_add_task(handler, sh, blockb2, istep)
       enddo
+      if (sh == shortcut_(0,1)/10 + 1) then
+        !$OMP BARRIER
+      endif
     enddo
+    call zmq_set_running(handler)
     call davidson_run(handler, v_0, s_0, size(v_0,1))
   else if (ithread == 1 ) then
+    !$OMP BARRIER
     call davidson_miniserver_run ()
   else
+    !$OMP BARRIER
     call davidson_slave_inproc(ithread)
   endif
   !$OMP END PARALLEL
@@ -378,8 +364,6 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_
       s_0(i,istate) = s_0(i,istate) + s2_jj(i)* u_0(i,istate)
     enddo
   enddo
-  deallocate(shortcut, sort_idx, sorted, version)
-  deallocate(ut)
 end
 
 
@@ -414,8 +398,6 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
   integer                        :: N_st_8
   
   integer, external              :: align_double
-  integer :: blockb, blockb2, istep
-  double precision :: ave_workload, workload, target_workload_inv
   
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st