diff --git a/.travis.yml b/.travis.yml
index 40c09bbc..57991ba3 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -25,7 +25,7 @@ python:
     - "2.6"
 
 script: 
-  - ./configure  --production ./config/gfortran.cfg
+  - ./configure  --production ./config/travis.cfg
   - source ./quantum_package.rc ; qp_module.py install Full_CI Full_CI_ZMQ Hartree_Fock CAS_SD_ZMQ mrcepa0 All_singles
   - source ./quantum_package.rc ; ninja
   - source ./quantum_package.rc ; cd ocaml ; make ; cd -
diff --git a/config/travis.cfg b/config/travis.cfg
new file mode 100644
index 00000000..024e330b
--- /dev/null
+++ b/config/travis.cfg
@@ -0,0 +1,62 @@
+# Common flags
+##############
+#
+# -ffree-line-length-none : Needed for IRPF90 which produces long lines
+# -lblas -llapack         : Link with libblas and liblapack libraries provided by the system
+# -I .                    : Include the curent directory (Mandatory)
+#
+# --ninja                 : Allow the utilisation of ninja. (Mandatory)
+# --align=32              : Align all provided arrays on a 32-byte boundary
+#
+#
+[COMMON]
+FC           : gfortran -ffree-line-length-none -I . -g
+LAPACK_LIB   : -llapack -lblas
+IRPF90       : irpf90
+IRPF90_FLAGS : --ninja --align=32
+
+# Global options
+################
+#
+# 1 : Activate
+# 0 : Deactivate
+#
+[OPTION]
+MODE    : OPT        ; [ OPT | PROFILE | DEBUG ] : Chooses the section below
+CACHE   : 1          ; Enable cache_compile.py
+OPENMP  : 1          ; Append OpenMP flags
+
+# Optimization flags
+####################
+#
+# -Ofast                  : Disregard strict standards compliance. Enables all -O3 optimizations.
+#                           It also enables optimizations that are not valid
+#                           for all standard-compliant programs.  It turns on
+#                           -ffast-math and the Fortran-specific
+#                           -fno-protect-parens and -fstack-arrays.
+[OPT]
+FCFLAGS : -Ofast -march=native
+
+# Profiling flags
+#################
+#
+[PROFILE]
+FC      : -p -g 
+FCFLAGS : -Ofast 
+
+# Debugging flags
+#################
+#
+# -fcheck=all  : Checks uninitialized variables,  array subscripts, etc...
+# -g           : Extra debugging information
+#
+[DEBUG]
+FCFLAGS : -fcheck=all -g
+
+# OpenMP flags
+#################
+#
+[OPENMP]
+FC           : -fopenmp
+IRPF90_FLAGS : --openmp 
+
diff --git a/plugins/CAS_SD_ZMQ/EZFIO.cfg b/plugins/CAS_SD_ZMQ/EZFIO.cfg
new file mode 100644
index 00000000..7425c8ba
--- /dev/null
+++ b/plugins/CAS_SD_ZMQ/EZFIO.cfg
@@ -0,0 +1,10 @@
+[energy]
+type: double precision
+doc: "Calculated CAS-SD energy"
+interface: ezfio
+
+[energy_pt2]
+type: double precision
+doc: "Calculated selected CAS-SD energy with PT2 correction"
+interface: ezfio
+
diff --git a/plugins/CAS_SD_ZMQ/NEEDED_CHILDREN_MODULES b/plugins/CAS_SD_ZMQ/NEEDED_CHILDREN_MODULES
new file mode 100644
index 00000000..ae599426
--- /dev/null
+++ b/plugins/CAS_SD_ZMQ/NEEDED_CHILDREN_MODULES
@@ -0,0 +1,2 @@
+Generators_CAS Perturbation Selectors_CASSD ZMQ 
+
diff --git a/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f b/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f
index 01e57649..6844ed90 100644
--- a/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f
+++ b/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f
@@ -99,9 +99,9 @@ program fci_zmq
     print *,  'N_states = ', N_states
     do k=1,N_states
       print *, 'State', k
-      print *,  'PT2      = ', pt2
-      print *,  'E        = ', E_CI_before
-      print *,  'E+PT2    = ', E_CI_before+pt2
+      print *,  'PT2      = ', pt2(k)
+      print *,  'E        = ', E_CI_before(k)
+      print *,  'E+PT2    = ', E_CI_before(k)+pt2(k)
       print *,  '-----'
     enddo
     call ezfio_set_cas_sd_zmq_energy_pt2(E_CI_before+pt2)
@@ -145,9 +145,9 @@ subroutine ZMQ_selection(N_in, pt2)
 
   step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num ))
   step = max(1,step)
-  do i= N_det_generators, 1, -step
-    i_generator_start = max(i-step+1,1)
-    i_generator_max = i
+  do i= 1, N_det_generators,step
+    i_generator_start = i
+    i_generator_max = min(i+step-1,N_det_generators)
     write(task,*) i_generator_start, i_generator_max, 1, N
     call add_task_to_taskserver(zmq_to_qp_run_socket,task)
   end do
@@ -164,7 +164,6 @@ subroutine ZMQ_selection(N_in, pt2)
   if (N_in > 0) then
     call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) !!! PAS DE ROBIN
     call copy_H_apply_buffer_to_wf()
-call remove_duplicates_in_psi_det
     if (s2_eig) then
       call make_s2_eigenfunction
     endif
diff --git a/plugins/CAS_SD_ZMQ/energy.irp.f b/plugins/CAS_SD_ZMQ/energy.irp.f
index 4999c176..db1e7d1a 100644
--- a/plugins/CAS_SD_ZMQ/energy.irp.f
+++ b/plugins/CAS_SD_ZMQ/energy.irp.f
@@ -3,9 +3,9 @@ BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ]
  BEGIN_DOC
  ! E0 in the denominator of the PT2
  END_DOC
- pt2_E0_denominator(:) = CI_electronic_energy(:)
-! pt2_E0_denominator(:) = HF_energy - nuclear_repulsion
-! pt2_E0_denominator(:) = barycentric_electronic_energy(:)
+ pt2_E0_denominator(1:N_states) = CI_electronic_energy(1:N_states)
+! pt2_E0_denominator(1:N_states) = HF_energy - nuclear_repulsion
+! pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states)
  call write_double(6,pt2_E0_denominator(1)+nuclear_repulsion, 'PT2 Energy denominator')
 END_PROVIDER
 
diff --git a/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f b/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f
index 36550116..dfaee629 100644
--- a/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f
+++ b/plugins/CAS_SD_ZMQ/run_selection_slave.irp.f
@@ -4,7 +4,7 @@ subroutine run_selection_slave(thread,iproc,energy)
   use selection_types
   implicit none
 
-  double precision, intent(in)    :: energy(N_states_diag)
+  double precision, intent(in)    :: energy(N_states)
   integer,  intent(in)            :: thread, iproc
   integer                        :: rc, i
 
diff --git a/plugins/CAS_SD_ZMQ/selection.irp.f b/plugins/CAS_SD_ZMQ/selection.irp.f
index 39131520..f90ee488 100644
--- a/plugins/CAS_SD_ZMQ/selection.irp.f
+++ b/plugins/CAS_SD_ZMQ/selection.irp.f
@@ -202,11 +202,6 @@ subroutine fill_buffer_single(i_generator, sp, h1, bannedOrb, fock_diag_tmp, E0,
     if(vect(1, p1) == 0d0) cycle
     call apply_particle(mask, sp, p1, det, ok, N_int)
     
-logical, external :: is_in_wavefunction
-if (is_in_wavefunction(det,N_int)) then
-  cycle
-endif
-    
     Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int)
     max_e_pert = 0d0
     
diff --git a/plugins/CAS_SD_ZMQ/selection_cassd_slave.irp.f b/plugins/CAS_SD_ZMQ/selection_cassd_slave.irp.f
new file mode 100644
index 00000000..657ad63c
--- /dev/null
+++ b/plugins/CAS_SD_ZMQ/selection_cassd_slave.irp.f
@@ -0,0 +1,93 @@
+program selection_slave
+  implicit none
+  BEGIN_DOC
+! Helper program to compute the PT2 in distributed mode.
+  END_DOC
+
+  read_wf = .False.
+  SOFT_TOUCH read_wf
+  call provide_everything
+  call switch_qp_run_to_master
+  call run_wf
+end
+
+subroutine provide_everything
+  PROVIDE H_apply_buffer_allocated mo_bielec_integrals_in_map psi_det_generators psi_coef_generators psi_det_sorted_bit psi_selectors n_det_generators n_states generators_bitmask zmq_context
+   PROVIDE pt2_e0_denominator mo_tot_num N_int
+end
+
+subroutine run_wf
+  use f77_zmq
+  implicit none
+
+  integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR) :: zmq_to_qp_run_socket
+  double precision :: energy(N_states)
+  character*(64) :: states(1)
+  integer :: rc, i
+  
+  call provide_everything
+  
+  zmq_context = f77_zmq_ctx_new ()
+  states(1) = 'selection'
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  do
+
+    call wait_for_states(states,zmq_state,1)
+
+    if(trim(zmq_state) == 'Stopped') then
+
+      exit
+
+    else if (trim(zmq_state) == 'selection') then
+
+      ! Selection
+      ! ---------
+
+      print *,  'Selection'
+      call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
+  
+      !$OMP PARALLEL PRIVATE(i)
+      i = omp_get_thread_num()
+      call selection_slave_tcp(i, energy)
+      !$OMP END PARALLEL
+      print *,  'Selection done'
+
+    endif
+
+  end do
+end
+
+subroutine update_energy(energy)
+  implicit none
+  double precision, intent(in) :: energy(N_states)
+  BEGIN_DOC
+! Update energy when it is received from ZMQ
+  END_DOC
+  integer :: j,k
+  do j=1,N_states
+    do k=1,N_det
+      CI_eigenvectors(k,j) = psi_coef(k,j)
+    enddo
+  enddo
+  call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int)
+  if (.True.) then
+    do k=1,N_states
+      ci_electronic_energy(k) = energy(k)
+    enddo
+    TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors
+  endif
+
+  call write_double(6,ci_energy,'Energy')
+end
+
+subroutine selection_slave_tcp(i,energy)
+  implicit none
+  double precision, intent(in) :: energy(N_states)
+  integer, intent(in)            :: i
+
+  call run_selection_slave(0,i,energy)
+end
+
diff --git a/plugins/FOBOCI/SC2_1h1p.irp.f b/plugins/FOBOCI/SC2_1h1p.irp.f
index b9378575..7733831c 100644
--- a/plugins/FOBOCI/SC2_1h1p.irp.f
+++ b/plugins/FOBOCI/SC2_1h1p.irp.f
@@ -210,7 +210,7 @@ subroutine dressing_1h1p_by_2h2p(dets_in,u_in,diag_H_elements,dim_in,sze,N_st,Ni
   integer, intent(in)            :: dim_in, sze, N_st, Nint
   integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
   double precision, intent(inout) :: u_in(dim_in,N_st)
-  double precision, intent(out)  :: diag_H_elements(dim_in)
+  double precision, intent(out)  :: diag_H_elements(0:dim_in)
   double precision, intent(in)   :: convergence
   
   integer :: i,j,k,l
diff --git a/plugins/Full_CI_ZMQ/EZFIO.cfg b/plugins/Full_CI_ZMQ/EZFIO.cfg
new file mode 100644
index 00000000..26f1a8e5
--- /dev/null
+++ b/plugins/Full_CI_ZMQ/EZFIO.cfg
@@ -0,0 +1,11 @@
+[energy]
+type: double precision
+doc: Calculated Selected FCI energy
+interface: ezfio
+
+[energy_pt2]
+type: double precision
+doc: Calculated FCI energy + PT2
+interface: ezfio
+
+
diff --git a/plugins/Full_CI_ZMQ/energy.irp.f b/plugins/Full_CI_ZMQ/energy.irp.f
index 4999c176..db1e7d1a 100644
--- a/plugins/Full_CI_ZMQ/energy.irp.f
+++ b/plugins/Full_CI_ZMQ/energy.irp.f
@@ -3,9 +3,9 @@ BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ]
  BEGIN_DOC
  ! E0 in the denominator of the PT2
  END_DOC
- pt2_E0_denominator(:) = CI_electronic_energy(:)
-! pt2_E0_denominator(:) = HF_energy - nuclear_repulsion
-! pt2_E0_denominator(:) = barycentric_electronic_energy(:)
+ pt2_E0_denominator(1:N_states) = CI_electronic_energy(1:N_states)
+! pt2_E0_denominator(1:N_states) = HF_energy - nuclear_repulsion
+! pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states)
  call write_double(6,pt2_E0_denominator(1)+nuclear_repulsion, 'PT2 Energy denominator')
 END_PROVIDER
 
diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f
index 8b9488d2..382e8652 100644
--- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f
+++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f
@@ -5,11 +5,16 @@ program fci_zmq
   
   double precision, allocatable  :: pt2(:)
   integer                        :: degree
+  integer                        :: n_det_before, to_select
+  double precision               :: threshold_davidson_in
   
   allocate (pt2(N_states))
   
   pt2 = 1.d0
   diag_algorithm = "Lapack"
+  threshold_davidson_in = threshold_davidson
+  SOFT_TOUCH threshold_davidson
+  threshold_davidson = 1.d-4
   
   if (N_det > N_det_max) then
     call diagonalize_CI
@@ -33,18 +38,25 @@ program fci_zmq
   double precision               :: E_CI_before(N_states)
   
   
-  integer                        :: n_det_before
   print*,'Beginning the selection ...'
   E_CI_before(1:N_states) = CI_energy(1:N_states)
+  n_det_before = 0
   
   do while ( (N_det < N_det_max) .and. (maxval(abs(pt2(1:N_states))) > pt2_max) )
     n_det_before = N_det
-    call ZMQ_selection(max(1024-N_det, N_det), pt2)
+    to_select = 3*N_det
+    to_select = max(1024-to_select, to_select)
+    to_select = min(to_select, N_det_max-n_det_before)
+    call ZMQ_selection(to_select, pt2)
     
     PROVIDE  psi_coef
     PROVIDE  psi_det
     PROVIDE  psi_det_sorted
 
+    if (N_det == N_det_max) then
+      threshold_davidson = threshold_davidson_in
+      SOFT_TOUCH threshold_davidson
+    endif
     call diagonalize_CI
     call save_wavefunction
     
@@ -137,9 +149,9 @@ subroutine ZMQ_selection(N_in, pt2)
 
   step = int(5000000.d0 / dble(N_int * N_states * elec_num * elec_num * mo_tot_num * mo_tot_num ))
   step = max(1,step)
-  do i= N_det_generators, 1, -step
-    i_generator_start = max(i-step+1,1)
-    i_generator_max = i
+  do i= 1, N_det_generators,step
+    i_generator_start = i
+    i_generator_max = min(i+step-1,N_det_generators)
     write(task,*) i_generator_start, i_generator_max, 1, N
     call add_task_to_taskserver(zmq_to_qp_run_socket,task)
   end do
diff --git a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f
index 36550116..dfaee629 100644
--- a/plugins/Full_CI_ZMQ/run_selection_slave.irp.f
+++ b/plugins/Full_CI_ZMQ/run_selection_slave.irp.f
@@ -4,7 +4,7 @@ subroutine run_selection_slave(thread,iproc,energy)
   use selection_types
   implicit none
 
-  double precision, intent(in)    :: energy(N_states_diag)
+  double precision, intent(in)    :: energy(N_states)
   integer,  intent(in)            :: thread, iproc
   integer                        :: rc, i
 
diff --git a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f
index 5041e731..d6204cc3 100644
--- a/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f
+++ b/plugins/Full_CI_ZMQ/selection_davidson_slave.irp.f
@@ -22,7 +22,7 @@ subroutine run_wf
 
   integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
   integer(ZMQ_PTR) :: zmq_to_qp_run_socket
-  double precision :: energy(N_states_diag)
+  double precision :: energy(N_states)
   character*(64) :: states(2)
   integer :: rc, i
   
@@ -48,7 +48,7 @@ subroutine run_wf
       ! ---------
 
       print *,  'Selection'
-      call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states_diag)
+      call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
   
       !$OMP PARALLEL PRIVATE(i)
       i = omp_get_thread_num()
@@ -76,7 +76,7 @@ end
 
 subroutine update_energy(energy)
   implicit none
-  double precision, intent(in) :: energy(N_states_diag)
+  double precision, intent(in) :: energy(N_states)
   BEGIN_DOC
 ! Update energy when it is received from ZMQ
   END_DOC
@@ -88,7 +88,7 @@ subroutine update_energy(energy)
   enddo
   call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int)
   if (.True.) then
-    do k=1,size(ci_electronic_energy)
+    do k=1,N_states
       ci_electronic_energy(k) = energy(k)
     enddo
     TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors
@@ -99,7 +99,7 @@ end
 
 subroutine selection_slave_tcp(i,energy)
   implicit none
-  double precision, intent(in) :: energy(N_states_diag)
+  double precision, intent(in) :: energy(N_states)
   integer, intent(in)            :: i
 
   call run_selection_slave(0,i,energy)
diff --git a/plugins/Full_CI_ZMQ/selection_slave.irp.f b/plugins/Full_CI_ZMQ/selection_slave.irp.f
index b9e530e0..657ad63c 100644
--- a/plugins/Full_CI_ZMQ/selection_slave.irp.f
+++ b/plugins/Full_CI_ZMQ/selection_slave.irp.f
@@ -22,7 +22,7 @@ subroutine run_wf
 
   integer(ZMQ_PTR), external :: new_zmq_to_qp_run_socket
   integer(ZMQ_PTR) :: zmq_to_qp_run_socket
-  double precision :: energy(N_states_diag)
+  double precision :: energy(N_states)
   character*(64) :: states(1)
   integer :: rc, i
   
@@ -47,7 +47,7 @@ subroutine run_wf
       ! ---------
 
       print *,  'Selection'
-      call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states_diag)
+      call zmq_get_psi(zmq_to_qp_run_socket,1,energy,N_states)
   
       !$OMP PARALLEL PRIVATE(i)
       i = omp_get_thread_num()
@@ -62,7 +62,7 @@ end
 
 subroutine update_energy(energy)
   implicit none
-  double precision, intent(in) :: energy(N_states_diag)
+  double precision, intent(in) :: energy(N_states)
   BEGIN_DOC
 ! Update energy when it is received from ZMQ
   END_DOC
@@ -74,7 +74,7 @@ subroutine update_energy(energy)
   enddo
   call u_0_S2_u_0(CI_eigenvectors_s2,CI_eigenvectors,N_det,psi_det,N_int)
   if (.True.) then
-    do k=1,size(ci_electronic_energy)
+    do k=1,N_states
       ci_electronic_energy(k) = energy(k)
     enddo
     TOUCH ci_electronic_energy CI_eigenvectors_s2 CI_eigenvectors
@@ -85,7 +85,7 @@ end
 
 subroutine selection_slave_tcp(i,energy)
   implicit none
-  double precision, intent(in) :: energy(N_states_diag)
+  double precision, intent(in) :: energy(N_states)
   integer, intent(in)            :: i
 
   call run_selection_slave(0,i,energy)
diff --git a/plugins/MP2/mp2.irp.f b/plugins/MP2/mp2.irp.f
index 3a049f7b..d4721c71 100644
--- a/plugins/MP2/mp2.irp.f
+++ b/plugins/MP2/mp2.irp.f
@@ -1,4 +1,10 @@
 program mp2
+  no_vvvv_integrals = .True.
+  SOFT_TOUCH no_vvvv_integrals
+  call run
+end
+
+subroutine run
   implicit none
   double precision, allocatable  :: pt2(:), norm_pert(:)
   double precision               :: H_pert_diag, E_old
diff --git a/plugins/MP2/mp2_wf.irp.f b/plugins/MP2/mp2_wf.irp.f
index 5efbb9cd..e7419319 100644
--- a/plugins/MP2/mp2_wf.irp.f
+++ b/plugins/MP2/mp2_wf.irp.f
@@ -1,4 +1,10 @@
 program mp2_wf
+  no_vvvv_integrals = .True.
+  SOFT_TOUCH no_vvvv_integrals
+  call run
+end
+
+subroutine run
   implicit none
   BEGIN_DOC
 ! Save the MP2 wave function
diff --git a/plugins/MRCC_Utils/amplitudes.irp.f b/plugins/MRCC_Utils/amplitudes.irp.f
index 053527f7..72d3ea67 100644
--- a/plugins/MRCC_Utils/amplitudes.irp.f
+++ b/plugins/MRCC_Utils/amplitudes.irp.f
@@ -89,7 +89,7 @@ END_PROVIDER
      !$OMP shared(is_active_exc, active_hh_idx, active_pp_idx, n_exc_active)&
      !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk, ppp, hh, s)
  allocate(lref(N_det_non_ref))
- !$OMP DO schedule(static,10)
+ !$OMP DO schedule(dynamic)
  do ppp=1,n_exc_active
    active_excitation_to_determinants_val(:,:,ppp) = 0d0
    active_excitation_to_determinants_idx(:,ppp)   = 0
@@ -191,6 +191,15 @@ END_PROVIDER
         end if
       end do
 
+      if (a_col == at_row) then
+        t(:) = t(:) + 1.d0
+      endif
+      if (sum(dabs(t(:))) > 0.d0) then
+        wk = wk+1
+        A_ind_mwen(wk) = a_col
+        A_val_mwen(:,wk) = t(:)
+      endif
+
     end do
 
     if(wk /= 0) then
diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f
index 9d5e8a67..e667d255 100644
--- a/plugins/MRCC_Utils/davidson.irp.f
+++ b/plugins/MRCC_Utils/davidson.irp.f
@@ -628,7 +628,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   integer                        :: k_pairs, kl
   
   integer                        :: iter2
-  double precision, allocatable  :: W(:,:),  U(:,:), S(:,:)
+  double precision, allocatable  :: W(:,:),  U(:,:), S(:,:), overlap(:,:)
   double precision, allocatable  :: y(:,:), h(:,:), lambda(:), s2(:)
   double precision, allocatable  :: c(:), s_(:,:), s_tmp(:,:)
   double precision               :: diag_h_mat_elem
@@ -640,8 +640,10 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   include 'constants.include.F'
   
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
-  if (N_st_diag > sze) then
-     stop 'error in Davidson : N_st_diag > sze'
+  if (N_st_diag*3 > sze) then
+     print *,  'error in Davidson :'
+     print *,  'Increase n_det_max_jacobi to ', N_st_diag*3
+     stop -1
   endif
 
   PROVIDE nuclear_repulsion
@@ -666,7 +668,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   write(iunit,'(A)') trim(write_buffer)
   write_buffer = ' Iter'
   do i=1,N_st
-    write_buffer = trim(write_buffer)//'      Energy          S^2      Residual'
+    write_buffer = trim(write_buffer)//'      Energy          S^2       Residual '
   enddo
   write(iunit,'(A)') trim(write_buffer)
   write_buffer = '===== '
@@ -678,26 +680,19 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   integer, external :: align_double
   sze_8 = align_double(sze)
 
-  double precision :: delta
-
-  if (s2_eig) then
-    delta = 1.d0
-  else
-    delta = 0.d0
-  endif
-  
   itermax = min(davidson_sze_max, sze/N_st_diag)
   allocate(                                                          &
-      W(sze_8,N_st_diag*itermax),                           &
-      U(sze_8,N_st_diag*itermax),                           &
-      S(sze_8,N_st_diag*itermax),                           &
-      h(N_st_diag*itermax,N_st_diag*itermax),      &
-      y(N_st_diag*itermax,N_st_diag*itermax),      &
-      s_(N_st_diag*itermax,N_st_diag*itermax),     &
-      s_tmp(N_st_diag*itermax,N_st_diag*itermax),  &
+      W(sze_8,N_st_diag*itermax),                                    &
+      U(sze_8,N_st_diag*itermax),                                    &
+      S(sze_8,N_st_diag*itermax),                                    &
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      s_(N_st_diag*itermax,N_st_diag*itermax),                       &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
       residual_norm(N_st_diag),                                      &
-      c(N_st_diag*itermax),                                 &
-      s2(N_st_diag*itermax),                                &
+      c(N_st_diag*itermax),                                          &
+      s2(N_st_diag*itermax),                                         &
+      overlap(N_st_diag*itermax,N_st_diag*itermax),                  &
       lambda(N_st_diag*itermax))
 
   h  = 0.d0
@@ -721,24 +716,18 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   converged = .False.
   
   double precision               :: r1, r2
-  do k=N_st+1,N_st_diag-2,2
+  do k=N_st+1,N_st_diag
+      u_in(k,k) = 10.d0
       do i=1,sze
         call random_number(r1)
         call random_number(r2)
         r1 = dsqrt(-2.d0*dlog(r1))
         r2 = dtwo_pi*r2
         u_in(i,k) = r1*dcos(r2)
-        u_in(i,k+1) = r1*dsin(r2)
       enddo
   enddo
-  do k=N_st_diag-1,N_st_diag
-      do i=1,sze
-        call random_number(r1)
-        call random_number(r2)
-        r1 = dsqrt(-2.d0*dlog(r1))
-        r2 = dtwo_pi*r2
-        u_in(i,k) = r1*dcos(r2)
-      enddo
+  do k=1,N_st_diag
+      call normalize(u_in(1,k),sze)
   enddo
 
   
@@ -776,6 +765,45 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
           1.d0, U, size(U,1), S, size(S,1),                &
           0.d0, s_, size(s_,1))
 
+!      ! Diagonalize S^2
+!      ! ---------------
+!
+!      call lapack_diag(s2,y,s_,size(s_,1),shift2)
+!
+!      ! Rotate H in the basis of eigenfunctions of s2
+!      ! ---------------------------------------------
+!
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('T','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+!
+!      ! Damp interaction between different spin states
+!      ! ------------------------------------------------
+!
+!      do k=1,shift2
+!        do l=1,shift2
+!          if (dabs(s2(k) - s2(l)) > 1.d0) then
+!            h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l))))
+!          endif
+!        enddo
+!      enddo
+!
+!      ! Rotate back H 
+!      ! -------------
+!
+!      call dgemm('N','T',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+
+      
       ! Diagonalize h
       ! -------------
       call lapack_diag(lambda,y,h,size(h,1),shift2)
@@ -796,24 +824,73 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
       enddo
 
       if (s2_eig) then
-        logical :: state_ok(N_st_diag*davidson_sze_max)
+          logical :: state_ok(N_st_diag*davidson_sze_max)
+          do k=1,shift2
+            state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          enddo
+      else
+        state_ok(k) = .True.
+      endif
+
+      do k=1,shift2
+        if (.not. state_ok(k)) then
+          do l=k+1,shift2
+            if (state_ok(l)) then
+              call dswap(shift2, y(1,k), 1, y(1,l), 1)
+              call dswap(1, s2(k), 1, s2(l), 1)
+              call dswap(1, lambda(k), 1, lambda(l), 1)
+              state_ok(k) = .True.
+              state_ok(l) = .False.
+              exit
+            endif
+          enddo
+        endif
+      enddo
+
+      if (state_following) then
+
+        ! Compute overlap with U_in
+        ! -------------------------
+        
+        integer                        :: order(N_st_diag)
+        double precision               :: cmax
+        overlap = -1.d0
         do k=1,shift2
-          state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          do i=1,shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
         enddo
-        do k=1,shift2
-          if (.not. state_ok(k)) then
-            do l=k+1,shift2
-              if (state_ok(l)) then
-                call dswap(shift2, y(1,k), 1, y(1,l), 1)
-                call dswap(1, s2(k), 1, s2(l), 1)
-                call dswap(1, lambda(k), 1, lambda(l), 1)
-                state_ok(k) = .True.
-                state_ok(l) = .False.
-                exit
-              endif
-            enddo
+        do k=1,N_st
+          cmax = -1.d0
+          do i=1,N_st
+            if (overlap(i,k) > cmax) then
+              cmax = overlap(i,k) 
+              order(k) = i
+            endif
+          enddo
+          do i=1,shift2
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
           endif
         enddo
+        do k=1,N_st
+          overlap(k,1) = lambda(k)
+          overlap(k,2) = s2(k)
+        enddo
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            lambda(k) = overlap(l,1)
+            s2(k) = overlap(l,2)
+          endif
+        enddo
+        
       endif
 
 
@@ -831,11 +908,31 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
       ! -----------------------
 
       do k=1,N_st_diag
-        do i=1,sze
-          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
-              * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
-             )/max(H_jj(i) - lambda (k),1.d-2)
-        enddo
+        if (state_ok(k)) then
+          do i=1,sze
+            U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
+                * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
+              )/max(H_jj(i) - lambda (k),1.d-2)
+          enddo
+        else
+          ! Randomize components with bad <S2>
+            do i=1,sze-2,2
+              call random_number(r1)
+              call random_number(r2)
+              r1 = dsqrt(-2.d0*dlog(r1))
+              r2 = dtwo_pi*r2
+              U(i,shift2+k) = r1*dcos(r2)
+              U(i+1,shift2+k) = r1*dsin(r2)
+            enddo
+            do i=sze-2+1,sze
+              call random_number(r1)
+              call random_number(r2)
+              r1 = dsqrt(-2.d0*dlog(r1))
+              r2 = dtwo_pi*r2
+              U(i,shift2+k) = r1*dcos(r2)
+            enddo
+        endif
+
         if (k <= N_st) then
           residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
           to_print(1,k) = lambda(k) + nuclear_repulsion
@@ -858,20 +955,16 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
 
     enddo
 
-    if (.not.converged) then
-      iter = itermax-1
-    endif
-
     ! Re-contract to u_in
     ! -----------
     
-    do k=1,N_st_diag
-      energies(k) = lambda(k)
-    enddo
+    call dgemm('N','N', sze, N_st_diag, shift2,                    &
+        1.d0, U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
 
-    call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0,      &
-        U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
+  enddo
 
+  do k=1,N_st_diag
+    energies(k) = lambda(k)
   enddo
 
   write_buffer = '===== '
@@ -884,7 +977,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
 
   deallocate (                                                       &
       W, residual_norm,                                              &
-      U,                                                             &
+      U, overlap,                                                    &
       c, S,                                                       &
       h,                                                             &
       y, s_, s_tmp,                                                  &
@@ -950,7 +1043,7 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i
   !$OMP PARALLEL DEFAULT(NONE)                                       &
       !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
       !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8, &
-      !$OMP  N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij,istate_in)
+      !$OMP  N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij, delta_ij_s2,istate_in)
   allocate(vt(N_st_8,n),st(N_st_8,n))
   Vt = 0.d0
   St = 0.d0
@@ -1035,6 +1128,8 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i
         do istate=1,N_st
           vt (istate,i) = vt (istate,i) + delta_ij(istate_in,jj,ii)*ut(istate,j)
           vt (istate,j) = vt (istate,j) + delta_ij(istate_in,jj,ii)*ut(istate,i)
+          st (istate,i) = st (istate,i) + delta_ij_s2(istate_in,jj,ii)*ut(istate,j)
+          st (istate,j) = st (istate,j) + delta_ij_s2(istate_in,jj,ii)*ut(istate,i)
         enddo
     enddo
   enddo
diff --git a/plugins/MRCC_Utils/mrcc_dummy.irp.f b/plugins/MRCC_Utils/mrcc_dummy.irp.f
deleted file mode 100644
index 8f1deda8..00000000
--- a/plugins/MRCC_Utils/mrcc_dummy.irp.f
+++ /dev/null
@@ -1,4 +0,0 @@
-program pouet
-
-
-end
diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f
index 7005fa19..0540eed9 100644
--- a/plugins/MRCC_Utils/mrcc_utils.irp.f
+++ b/plugins/MRCC_Utils/mrcc_utils.irp.f
@@ -77,18 +77,18 @@ BEGIN_PROVIDER [ double precision, hij_mrcc, (N_det_non_ref,N_det_ref) ]
 
 END_PROVIDER
 
- BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ]
-&BEGIN_PROVIDER [ double precision, delta_ii, (N_states,N_det_ref) ]
- implicit none
- BEGIN_DOC
- ! Dressing matrix in N_det basis
- END_DOC
- integer :: i,j,m
- delta_ij = 0.d0
- delta_ii = 0.d0
- call H_apply_mrcc(delta_ij,delta_ii,N_states,N_det_non_ref,N_det_ref)
-
-END_PROVIDER
+! BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ]
+!&BEGIN_PROVIDER [ double precision, delta_ii, (N_states,N_det_ref) ]
+! implicit none
+! BEGIN_DOC
+! ! Dressing matrix in N_det basis
+! END_DOC
+! integer :: i,j,m
+! delta_ij = 0.d0
+! delta_ii = 0.d0
+! call H_apply_mrcc(delta_ij,delta_ii,N_states,N_det_non_ref,N_det_ref)
+!
+!END_PROVIDER
              
 
 BEGIN_PROVIDER [ double precision, h_matrix_dressed, (N_det,N_det,N_states) ]
@@ -139,7 +139,6 @@ END_PROVIDER
 
    integer :: mrcc_state 
    
-   mrcc_state = N_states
    do j=1,min(N_states,N_det)
      do i=1,N_det
        CI_eigenvectors_dressed(i,j) = psi_coef(i,j)
@@ -148,16 +147,33 @@ END_PROVIDER
    
    if (diag_algorithm == "Davidson") then
      
-!     call davidson_diag_mrcc(psi_det,CI_eigenvectors_dressed,CI_electronic_energy_dressed,&
-!          size(CI_eigenvectors_dressed,1),N_det,N_states,N_states_diag,N_int,output_determinants,mrcc_state)
-
-     call davidson_diag_mrcc_HS2(psi_det,CI_eigenvectors_dressed,&
-          size(CI_eigenvectors_dressed,1), &
-          CI_electronic_energy_dressed,N_det,N_states,N_states_diag,N_int, &
-          output_determinants,mrcc_state)
-
-     call u_0_S2_u_0(CI_eigenvectors_s2_dressed,CI_eigenvectors_dressed,N_det,psi_det,N_int,&
-         N_states_diag,size(CI_eigenvectors_dressed,1))
+     allocate (eigenvectors(size(CI_eigenvectors_dressed,1),size(CI_eigenvectors_dressed,2)), &
+     eigenvalues(size(CI_electronic_energy_dressed,1)))
+     do j=1,min(N_states,N_det)
+       do i=1,N_det
+         eigenvectors(i,j) = psi_coef(i,j)
+       enddo
+     enddo
+     do mrcc_state=1,N_states
+      do j=mrcc_state,min(N_states,N_det)
+        do i=1,N_det
+          eigenvectors(i,j) = psi_coef(i,j)
+        enddo
+      enddo
+      call davidson_diag_mrcc_HS2(psi_det,eigenvectors,&
+            size(eigenvectors,1), &
+            eigenvalues,N_det,N_states,N_states_diag,N_int, &
+            output_determinants,mrcc_state)
+      CI_eigenvectors_dressed(1:N_det,mrcc_state) = eigenvectors(1:N_det,mrcc_state)
+      CI_electronic_energy_dressed(mrcc_state) = eigenvalues(mrcc_state)
+   enddo
+   do k=N_states+1,N_states_diag
+     CI_eigenvectors_dressed(1:N_det,k) = eigenvectors(1:N_det,k)
+     CI_electronic_energy_dressed(k) = eigenvalues(k)
+   enddo
+   call u_0_S2_u_0(CI_eigenvectors_s2_dressed,CI_eigenvectors_dressed,N_det,psi_det,N_int,&
+          N_states_diag,size(CI_eigenvectors_dressed,1))
+   deallocate (eigenvectors,eigenvalues)
 
      
    else if (diag_algorithm == "Lapack") then
@@ -628,12 +644,12 @@ END_PROVIDER
   double precision               :: phase
   
   
-  double precision, allocatable :: rho_mrcc_init(:,:)
+  double precision, allocatable :: rho_mrcc_init(:)
   integer                        :: a_coll, at_roww
   
   print *, "TI", hh_nex, N_det_non_ref
 
-  allocate(rho_mrcc_init(N_det_non_ref, N_states))
+  allocate(rho_mrcc_init(N_det_non_ref))
   allocate(x_new(hh_nex))
   allocate(x(hh_nex), AtB(hh_nex))
   x = 0d0
@@ -644,9 +660,8 @@ END_PROVIDER
     AtB(:) = 0.d0
     !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, active_excitation_to_determinants_idx,&
         !$OMP   active_excitation_to_determinants_val, x, N_det_ref, hh_nex, N_det_non_ref)          &
-        !$OMP private(at_row, a_col, t, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)&
+        !$OMP private(at_row, a_col, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)&
         !$OMP shared(N_states,mrcc_col_shortcut, mrcc_N_col, AtB, mrcc_AtA_val, mrcc_AtA_ind, s, n_exc_active, active_pp_idx)
-    allocate(A_val_mwen(N_states,hh_nex), A_ind_mwen(hh_nex), t(N_states))
     
     !$OMP DO schedule(dynamic, 100)
     do at_roww = 1, n_exc_active ! hh_nex
@@ -655,11 +670,11 @@ END_PROVIDER
           AtB(at_row) = AtB(at_row) + psi_non_ref_coef(active_excitation_to_determinants_idx(i, at_roww), s) * active_excitation_to_determinants_val(s,i, at_roww)
       end do
     end do
-    !$OMP END DO NOWAIT
-    deallocate (A_ind_mwen, A_val_mwen)
+    !$OMP END DO
+   
     !$OMP END PARALLEL
 
-    x = 0d0
+    X(:) = 0d0
     
     
     do a_coll = 1, n_exc_active
@@ -669,14 +684,12 @@ END_PROVIDER
     
     rho_mrcc_init = 0d0
     
-    !$OMP PARALLEL default(shared) &
-        !$OMP private(lref, hh, pp, II, myMask, myDet, ok, ind, phase)
     allocate(lref(N_det_ref))
-    !$OMP DO schedule(static, 1)
     do hh = 1, hh_shortcut(0)
       do pp = hh_shortcut(hh), hh_shortcut(hh+1)-1
         if(is_active_exc(pp)) cycle
         lref = 0
+        AtB(pp) = 0.d0
         do II=1,N_det_ref
           call apply_hole_local(psi_ref(1,1,II), hh_exists(1, hh), myMask, ok, N_int)
           if(.not. ok) cycle
@@ -686,39 +699,36 @@ END_PROVIDER
           if(ind == -1) cycle
           ind = psi_non_ref_sorted_idx(ind)
           call get_phase(myDet(1,1), psi_ref(1,1,II), phase, N_int)
-          X(pp) += psi_ref_coef(II,s)**2
           AtB(pp) += psi_non_ref_coef(ind, s) * psi_ref_coef(II, s) * phase
           lref(II) = ind
-          if(phase < 0d0) lref(II) = -ind
+          if(phase < 0.d0) lref(II) = -ind
         end do
-        X(pp) =  AtB(pp) / X(pp)
+        X(pp) =  AtB(pp) 
         do II=1,N_det_ref
           if(lref(II) > 0) then
-            rho_mrcc_init(lref(II),s) = psi_ref_coef(II,s) * X(pp)
+            rho_mrcc_init(lref(II)) = psi_ref_coef(II,s) * X(pp)
           else if(lref(II) < 0) then
-            rho_mrcc_init(-lref(II),s) = -psi_ref_coef(II,s) * X(pp)
+            rho_mrcc_init(-lref(II)) = -psi_ref_coef(II,s) * X(pp)
           end if
         end do
       end do
     end do
-    !$OMP END DO
     deallocate(lref)
-    !$OMP END PARALLEL
-    
+
     x_new = x
     
     double precision               :: factor, resold
     factor = 1.d0
     resold = huge(1.d0)
 
-    do k=0,100000
+    do k=0,hh_nex*hh_nex
       !$OMP PARALLEL default(shared) private(cx, i, a_col, a_coll)
       
       !$OMP DO
       do i=1,N_det_non_ref
-        rho_mrcc(i,s) = rho_mrcc_init(i,s) 
+        rho_mrcc(i,s) = rho_mrcc_init(i) 
       enddo
-      !$OMP END DO
+      !$OMP END DO NOWAIT
       
       !$OMP DO
       do a_coll = 1, n_exc_active
@@ -746,15 +756,15 @@ END_PROVIDER
         X(a_col) = X_new(a_col)
       end do
       if (res > resold) then
-        factor = -factor * 0.5d0
+        factor = factor * 0.5d0
       endif
       resold = res
       
-      if(mod(k, 100) == 0) then
+      if(iand(k, 4095) == 0) then
         print *, "res ", k, res
       end if
       
-      if(res < 1d-9) exit
+      if(res < 1d-12) exit
     end do
     
     norm = 0.d0
@@ -917,6 +927,9 @@ END_PROVIDER
 
      norm = norm*f
      print *,  'norm of |T Psi_0> = ', dsqrt(norm)
+     if (dsqrt(norm) > 1.d0) then
+       stop 'Error : Norm of the SD larger than the norm of the reference.'
+     endif
 
      do i=1,N_det_ref
        norm = norm + psi_ref_coef(i,s)*psi_ref_coef(i,s)
@@ -928,7 +941,7 @@ END_PROVIDER
      ! rho_mrcc now contains the product of the scaling factors and the
      ! normalization constant
     
-    dIj_unique(:size(X), s) = X(:)
+    dIj_unique(1:size(X), s) = X(1:size(X))
   end do
 
 END_PROVIDER
@@ -940,17 +953,14 @@ BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ]
   integer :: s,i,j
   double precision, external :: get_dij_index
   print *, "computing amplitudes..."
-  !$OMP PARALLEL DEFAULT(shared) PRIVATE(s,i,j)
   do s=1, N_states
-    !$OMP DO
     do i=1, N_det_non_ref
       do j=1, N_det_ref
+        !DIR$ FORCEINLINE
         dij(j, i, s) = get_dij_index(j, i, s, N_int)
       end do
     end do
-    !$OMP END DO 
   end do
-  !$OMP END PARALLEL
   print *, "done computing amplitudes"
 END_PROVIDER
 
diff --git a/plugins/Psiref_Utils/psi_ref_utils.irp.f b/plugins/Psiref_Utils/psi_ref_utils.irp.f
index 41db2f10..c4147ebc 100644
--- a/plugins/Psiref_Utils/psi_ref_utils.irp.f
+++ b/plugins/Psiref_Utils/psi_ref_utils.irp.f
@@ -97,6 +97,10 @@ END_PROVIDER
    endif
  enddo
  N_det_non_ref = i_non_ref
+ if (N_det_non_ref < 1) then
+   print *,  'Error : All determinants are in the reference'
+   stop -1
+ endif
 END_PROVIDER
 
  BEGIN_PROVIDER [ integer(bit_kind), psi_non_ref_restart,  (N_int,2,psi_det_size) ]
diff --git a/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES b/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/plugins/Selectors_CASSD/NEEDED_CHILDREN_MODULES
@@ -0,0 +1 @@
+
diff --git a/plugins/Selectors_CASSD/README.rst b/plugins/Selectors_CASSD/README.rst
new file mode 100644
index 00000000..19b4ec2b
--- /dev/null
+++ b/plugins/Selectors_CASSD/README.rst
@@ -0,0 +1,12 @@
+===============
+Selectors_CASSD
+===============
+
+Needed Modules
+==============
+.. Do not edit this section It was auto-generated
+.. by the `update_README.py` script.
+Documentation
+=============
+.. Do not edit this section It was auto-generated
+.. by the `update_README.py` script.
diff --git a/plugins/Selectors_CASSD/selectors.irp.f b/plugins/Selectors_CASSD/selectors.irp.f
new file mode 100644
index 00000000..ab36527d
--- /dev/null
+++ b/plugins/Selectors_CASSD/selectors.irp.f
@@ -0,0 +1,95 @@
+use bitmasks
+
+BEGIN_PROVIDER [ integer, psi_selectors_size ]
+ implicit none
+ psi_selectors_size = psi_det_size
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, N_det_selectors]
+  implicit none
+  BEGIN_DOC
+  ! For Single reference wave functions, the number of selectors is 1 : the
+  ! Hartree-Fock determinant
+  END_DOC
+  N_det_selectors = N_det
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer(bit_kind), psi_selectors, (N_int,2,psi_selectors_size) ]
+&BEGIN_PROVIDER [ double precision, psi_selectors_coef, (psi_selectors_size,N_states) ]
+  implicit none
+  BEGIN_DOC
+  ! Determinants on which we apply <i|H|psi> for perturbation.
+  END_DOC
+  integer                        :: i, k, l, m
+  logical                        :: good
+
+  do i=1,N_det_generators
+    do k=1,N_int
+      psi_selectors(k,1,i) = psi_det_generators(k,1,i)
+      psi_selectors(k,2,i) = psi_det_generators(k,2,i)
+    enddo
+  enddo
+  do k=1,N_states
+    do i=1,N_det_selectors
+      psi_selectors_coef(i,k) = psi_coef_generators(i,k)
+    enddo
+  enddo
+
+  m=N_det_generators
+
+  do i=1,N_det
+    do l=1,n_cas_bitmask
+      good = .True.
+      do k=1,N_int
+        good = good .and. (                                         &
+            iand(not(cas_bitmask(k,1,l)), psi_det_sorted(k,1,i)) ==         &
+            iand(not(cas_bitmask(k,1,l)), HF_bitmask(k,1)) .and. (   &
+            iand(not(cas_bitmask(k,2,l)), psi_det_sorted(k,2,i)) ==         &
+            iand(not(cas_bitmask(k,2,l)), HF_bitmask(k,2) )) )
+      enddo
+      if (good) then
+        exit
+      endif
+    enddo
+    if (.not.good) then
+      m = m+1
+      do k=1,N_int
+        psi_selectors(k,1,m) = psi_det_sorted(k,1,i)
+        psi_selectors(k,2,m) = psi_det_sorted(k,2,i)
+      enddo
+      psi_selectors_coef(m,:) = psi_coef_sorted(i,:)
+    endif
+  enddo
+  if (N_det /= m) then
+    print *,  N_det, m
+    stop 'N_det /= m'
+  endif
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, psi_selectors_coef_transp, (N_states,psi_selectors_size) ]
+  implicit none
+  BEGIN_DOC
+  ! Transposed psi_selectors
+  END_DOC
+  integer                        :: i,k
+
+  do i=1,N_det_selectors
+    do k=1,N_states
+      psi_selectors_coef_transp(k,i) = psi_selectors_coef(i,k) 
+    enddo
+  enddo
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, psi_selectors_diag_h_mat, (psi_selectors_size) ]
+  implicit none
+  BEGIN_DOC
+  ! Diagonal elements of the H matrix for each selectors 
+  END_DOC
+  integer                        :: i
+  double precision :: diag_H_mat_elem
+  do i = 1, N_det_selectors
+   psi_selectors_diag_h_mat(i) = diag_H_mat_elem(psi_selectors(1,1,i),N_int)
+  enddo
+END_PROVIDER
+
+
diff --git a/plugins/Selectors_CASSD/zmq.irp.f b/plugins/Selectors_CASSD/zmq.irp.f
new file mode 100644
index 00000000..4359a876
--- /dev/null
+++ b/plugins/Selectors_CASSD/zmq.irp.f
@@ -0,0 +1,122 @@
+subroutine zmq_put_psi(zmq_to_qp_run_socket,worker_id, energy, size_energy)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Put the wave function on the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer, intent(in)            :: size_energy
+  double precision, intent(out)  :: energy(size_energy)
+  integer                        :: rc
+  character*(256)                :: msg
+
+  write(msg,*) 'put_psi ', worker_id, N_states, N_det, psi_det_size, n_det_generators, n_det_selectors
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)
+  if (rc /= len(trim(msg))) then
+    print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),ZMQ_SNDMORE)'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)
+  if (rc /= N_int*2*N_det*bit_kind) then
+    print *, 'f77_zmq_send(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)
+  if (rc /= psi_det_size*N_states*8) then
+    print *, 'f77_zmq_send(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)
+  if (rc /= size_energy*8) then
+    print *, 'f77_zmq_send(zmq_to_qp_run_socket,energy,size_energy*8,0)'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+  if (msg(1:rc) /= 'put_psi_reply 1') then
+    print *,  rc, trim(msg)
+    print *,  'Error in put_psi_reply'
+    stop 'error'
+  endif
+
+end
+
+
+
+subroutine zmq_get_psi(zmq_to_qp_run_socket, worker_id, energy, size_energy)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC
+! Get the wave function from the qp_run scheduler
+  END_DOC
+  integer(ZMQ_PTR), intent(in)   :: zmq_to_qp_run_socket
+  integer, intent(in)            :: worker_id
+  integer, intent(in)            :: size_energy
+  double precision, intent(out)  :: energy(size_energy)
+  integer                        :: rc
+  character*(64)                 :: msg
+
+  write(msg,*) 'get_psi ', worker_id
+
+  rc = f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)
+  if (rc /= len(trim(msg))) then
+    print *, 'f77_zmq_send(zmq_to_qp_run_socket,trim(msg),len(trim(msg)),0)'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,msg,len(msg),0)
+  if (msg(1:13) /= 'get_psi_reply') then
+    print *,  rc, trim(msg)
+    print *,  'Error in get_psi_reply'
+    stop 'error'
+  endif
+  
+  integer :: N_states_read, N_det_read, psi_det_size_read
+  integer :: N_det_selectors_read, N_det_generators_read
+  read(msg(14:rc),*) rc, N_states_read, N_det_read, psi_det_size_read, &
+    N_det_generators_read, N_det_selectors_read
+  if (rc /= worker_id) then
+    print *,  'Wrong worker ID'
+    stop 'error'
+  endif
+
+  N_states = N_states_read
+  N_det    = N_det_read
+  psi_det_size = psi_det_size_read
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)
+  if (rc /= N_int*2*N_det*bit_kind) then
+    print *, 'f77_zmq_recv(zmq_to_qp_run_socket,psi_det,N_int*2*N_det*bit_kind,ZMQ_SNDMORE)'
+    stop 'error'
+  endif
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)
+  if (rc /= psi_det_size*N_states*8) then
+    print *, '77_zmq_recv(zmq_to_qp_run_socket,psi_coef,psi_det_size*N_states*8,ZMQ_SNDMORE)'
+    stop 'error'
+  endif
+  TOUCH psi_det_size N_det N_states psi_det psi_coef
+
+  rc = f77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)
+  if (rc /= size_energy*8) then
+    print *, 'f77_zmq_recv(zmq_to_qp_run_socket,energy,size_energy*8,0)'
+    stop 'error'
+  endif
+
+  if (N_det_generators_read > 0) then
+    N_det_generators = N_det_generators_read
+    TOUCH N_det_generators
+  endif
+  if (N_det_selectors_read > 0) then
+    N_det_selectors  = N_det_selectors_read
+    TOUCH N_det_selectors
+  endif
+
+end
+
+
diff --git a/plugins/loc_cele/loc_exchange_int.irp.f b/plugins/loc_cele/loc_exchange_int.irp.f
index d7cc5c65..8bb47d89 100644
--- a/plugins/loc_cele/loc_exchange_int.irp.f
+++ b/plugins/loc_cele/loc_exchange_int.irp.f
@@ -14,7 +14,7 @@ program loc_int
   exchange_int = 0.d0
   iorder = 0
   print*,''
-  if(list_core_inact_check(iorb) == .False.)cycle
+  if(list_core_inact_check(iorb) .eqv. .False.)cycle
   do j = i+1, n_core_inact_orb
    jorb = list_core_inact(j)
    iorder(jorb) = jorb
@@ -46,7 +46,7 @@ program loc_int
   exchange_int = 0.d0
   iorder = 0
   print*,''
-  if(list_core_inact_check(iorb) == .False.)cycle
+  if(list_core_inact_check(iorb) .eqv. .False.)cycle
   do j = i+1, n_act_orb
    jorb = list_act(j)
    iorder(jorb) = jorb
@@ -78,7 +78,7 @@ program loc_int
   exchange_int = 0.d0
   iorder = 0
   print*,''
-  if(list_core_inact_check(iorb) == .False.)cycle
+  if(list_core_inact_check(iorb) .eqv. .False.)cycle
   do j = i+1, n_virt_orb
    jorb = list_virt(j)
    iorder(jorb) = jorb
diff --git a/plugins/loc_cele/loc_exchange_int_act.irp.f b/plugins/loc_cele/loc_exchange_int_act.irp.f
index b9bbeb82..f332dd5d 100644
--- a/plugins/loc_cele/loc_exchange_int_act.irp.f
+++ b/plugins/loc_cele/loc_exchange_int_act.irp.f
@@ -15,7 +15,7 @@ program loc_int
   exchange_int = 0.d0
   iorder = 0
   print*,''
-  if(list_core_inact_check(iorb) == .False.)cycle
+  if(list_core_inact_check(iorb) .eqv. .False.)cycle
   do j = i+1, n_act_orb
    jorb = list_act(j)
    iorder(jorb) = jorb
diff --git a/plugins/loc_cele/loc_exchange_int_inact.irp.f b/plugins/loc_cele/loc_exchange_int_inact.irp.f
index 2ff3c85f..fcf20ced 100644
--- a/plugins/loc_cele/loc_exchange_int_inact.irp.f
+++ b/plugins/loc_cele/loc_exchange_int_inact.irp.f
@@ -14,7 +14,7 @@ program loc_int
   exchange_int = 0.d0
   iorder = 0
   print*,''
-  if(list_core_inact_check(iorb) == .False.)cycle
+  if(list_core_inact_check(iorb) .eqv. .False.)cycle
   do j = i+1, n_core_inact_orb
    jorb = list_core_inact(j)
    iorder(jorb) = jorb
diff --git a/plugins/loc_cele/loc_exchange_int_virt.irp.f b/plugins/loc_cele/loc_exchange_int_virt.irp.f
index 333f189b..8302b5d2 100644
--- a/plugins/loc_cele/loc_exchange_int_virt.irp.f
+++ b/plugins/loc_cele/loc_exchange_int_virt.irp.f
@@ -15,7 +15,7 @@ program loc_int
   exchange_int = 0.d0
   iorder = 0
   print*,''
-  if(list_core_inact_check(iorb) == .False.)cycle
+  if(list_core_inact_check(iorb) .eqv. .False.)cycle
   do j = i+1, n_virt_orb
    jorb = list_virt(j)
    iorder(jorb) = jorb
diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f
index 3646b0b2..0c67ab99 100644
--- a/plugins/mrcepa0/dressing.irp.f
+++ b/plugins/mrcepa0/dressing.irp.f
@@ -4,6 +4,8 @@ use bitmasks
 
  BEGIN_PROVIDER [ double precision, delta_ij_mrcc, (N_states,N_det_non_ref,N_det_ref) ]
 &BEGIN_PROVIDER [ double precision, delta_ii_mrcc, (N_states, N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ij_s2_mrcc, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_s2_mrcc, (N_states, N_det_ref) ]
   use bitmasks
   implicit none
   integer :: gen, h, p, n, t, i, h1, h2, p1, p2, s1, s2, iproc
@@ -14,11 +16,13 @@ use bitmasks
   
   delta_ij_mrcc = 0d0
   delta_ii_mrcc = 0d0
-  print *, "Dij", dij(1,1,1)
+  delta_ij_s2_mrcc = 0d0
+  delta_ii_s2_mrcc = 0d0
+  PROVIDE dij
   provide hh_shortcut psi_det_size! lambda_mrcc
   !$OMP PARALLEL DO default(none)  schedule(dynamic) &
   !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) &
-  !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc) &
+  !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc, delta_ii_s2_mrcc, delta_ij_s2_mrcc) &
   !$OMP private(h, n, mask, omask, buf, ok, iproc)
   do gen= 1, N_det_generators
     allocate(buf(N_int, 2, N_det_non_ref))
@@ -37,7 +41,9 @@ use bitmasks
       end do
       n = n - 1
 
-      if(n /= 0) call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc,gen,n,buf,N_int,omask)
+      if(n /= 0) then
+        call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask)
+      endif
 
     end do
     deallocate(buf)
@@ -52,13 +58,15 @@ END_PROVIDER
 ! end subroutine
 
 
-subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffer,Nint,key_mask)
+subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask)
  use bitmasks
  implicit none
 
   integer, intent(in)            :: i_generator,n_selected, Nint
   double precision, intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref)
   double precision, intent(inout) :: delta_ii_(N_states,N_det_ref)
+  double precision, intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref)
+  double precision, intent(inout) :: delta_ii_s2_(N_states,N_det_ref)
 
   integer(bit_kind), intent(in)  :: det_buffer(Nint,2,n_selected)
   integer                        :: i,j,k,l,m
@@ -68,8 +76,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
   integer(bit_kind),allocatable  :: tq(:,:,:)
   integer                        :: N_tq, c_ref ,degree
 
-  double precision               :: hIk, hla, hIl, dIk(N_states), dka(N_states), dIa(N_states)
-  double precision, allocatable  :: dIa_hla(:,:)
+  double precision               :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states)
+  double precision, allocatable  :: dIa_hla(:,:), dIa_sla(:,:)
   double precision               :: haj, phase, phase2
   double precision               :: f(N_states), ci_inv(N_states)
   integer                        :: exc(0:2,2,2)
@@ -82,7 +90,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
   integer(bit_kind),intent(in)   :: key_mask(Nint, 2)
   integer,allocatable            :: idx_miniList(:)
   integer                        :: N_miniList, ni, leng
-  double precision, allocatable  :: hij_cache(:)
+  double precision, allocatable  :: hij_cache(:), sij_cache(:)
   
   integer(bit_kind), allocatable :: microlist(:,:,:), microlist_zero(:,:,:)
   integer, allocatable           :: idx_microlist(:), N_microlist(:), ptr_microlist(:), idx_microlist_zero(:)
@@ -92,7 +100,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
   
 
   leng = max(N_det_generators, N_det_non_ref)
-  allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref))
+  allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref), sij_cache(N_det_non_ref))
   allocate(idx_alpha(0:psi_det_size), degree_alpha(psi_det_size))
   !create_minilist_find_previous(key_mask, fullList, miniList, N_fullList, N_miniList, fullMatch, Nint)
   call create_minilist_find_previous(key_mask, psi_det_generators, miniList, i_generator-1, N_miniList, fullMatch, Nint)
@@ -117,7 +125,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
   
   deallocate(microlist, idx_microlist)
   
-  allocate (dIa_hla(N_states,N_det_non_ref))
+  allocate (dIa_hla(N_states,N_det_non_ref), dIa_sla(N_states,N_det_non_ref))
   
   ! |I>
   
@@ -185,6 +193,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
     do l_sd=1,idx_alpha(0)
       k_sd = idx_alpha(l_sd)
       call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hij_cache(k_sd))
+      call get_s2(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,sij_cache(k_sd))
     enddo
     ! |I>
     do i_I=1,N_det_ref
@@ -282,31 +291,36 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
       do l_sd=1,idx_alpha(0)
         k_sd = idx_alpha(l_sd)
         hla = hij_cache(k_sd)
+        sla = sij_cache(k_sd)
 !        call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hla)
         do i_state=1,N_states
           dIa_hla(i_state,k_sd) = dIa(i_state) * hla
+          dIa_sla(i_state,k_sd) = dIa(i_state) * sla
         enddo
       enddo
       call omp_set_lock( psi_ref_lock(i_I) )
       do i_state=1,N_states
-        if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5)then
-          do l_sd=1,idx_alpha(0)
-            k_sd = idx_alpha(l_sd)
-            delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd)
-            delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd)
-          enddo
-        else
+!        if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5)then
+!          do l_sd=1,idx_alpha(0)
+!            k_sd = idx_alpha(l_sd)
+!            delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd)
+!            delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd)
+!            delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + dIa_sla(i_state,k_sd)
+!            delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd)
+!          enddo
+!        else
           delta_ii_(i_state,i_I)  = 0.d0
           do l_sd=1,idx_alpha(0)
             k_sd = idx_alpha(l_sd)
             delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd)
+            delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + 0.5d0*dIa_sla(i_state,k_sd)
           enddo
-        endif
+!        endif
       enddo
       call omp_unset_lock( psi_ref_lock(i_I) )
     enddo
   enddo
-  deallocate (dIa_hla,hij_cache)
+  deallocate (dIa_hla,dIa_sla,hij_cache,sij_cache)
   deallocate(miniList, idx_miniList)
 end
 
@@ -315,45 +329,84 @@ end
 
  BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ]
 &BEGIN_PROVIDER [ double precision, delta_ii, (N_states, N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ij_s2, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_s2, (N_states, N_det_ref) ]
   use bitmasks
   implicit none
-  integer :: i, j, i_state
+  integer                        :: i, j, i_state
   
   !mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc
   
-  do i_state = 1, N_states
-    if(mrmode == 3) then
+  if(mrmode == 3) then
     do i = 1, N_det_ref
-      delta_ii(i_state,i)= delta_ii_mrcc(i_state,i)
+      do i_state = 1, N_states
+        delta_ii(i_state,i)= delta_ii_mrcc(i_state,i)
+        delta_ii_s2(i_state,i)= delta_ii_s2_mrcc(i_state,i)
+      enddo
       do j = 1, N_det_non_ref
-        delta_ij(i_state,j,i) = delta_ij_mrcc(i_state,j,i)
+        do i_state = 1, N_states
+          delta_ij(i_state,j,i) = delta_ij_mrcc(i_state,j,i)
+          delta_ij_s2(i_state,j,i) = delta_ij_s2_mrcc(i_state,j,i)
+        enddo
       end do
     end do
-!       
-!       do i = 1, N_det_ref
-!         delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) - delta_sub_ii(i,i_state)
-!         do j = 1, N_det_non_ref
-!           delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state) - delta_sub_ij(i,j,i_state)
-!         end do
-!       end do
-    else if(mrmode == 2) then
-      do i = 1, N_det_ref
+
+    ! =-=-= BEGIN STATE AVERAGE
+!    do i = 1, N_det_ref
+!      delta_ii(:,i)= delta_ii_mrcc(1,i)
+!      delta_ii_s2(:,i)= delta_ii_s2_mrcc(1,i)
+!      do i_state = 2, N_states
+!        delta_ii(:,i) += delta_ii_mrcc(i_state,i)
+!        delta_ii_s2(:,i) += delta_ii_s2_mrcc(i_state,i)
+!      enddo
+!      do j = 1, N_det_non_ref
+!        delta_ij(:,j,i) = delta_ij_mrcc(1,j,i)
+!        delta_ij_s2(:,j,i) = delta_ij_s2_mrcc(1,j,i)
+!        do i_state = 2, N_states
+!          delta_ij(:,j,i) += delta_ij_mrcc(i_state,j,i)
+!          delta_ij_s2(:,j,i) += delta_ij_s2_mrcc(i_state,j,i)
+!        enddo
+!      end do
+!    end do
+!    delta_ij = delta_ij * (1.d0/dble(N_states))
+!    delta_ii = delta_ii * (1.d0/dble(N_states))
+    ! =-=-= END STATE AVERAGE
+    !
+    !       do i = 1, N_det_ref
+    !         delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) - delta_sub_ii(i,i_state)
+    !         do j = 1, N_det_non_ref
+    !           delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state) - delta_sub_ij(i,j,i_state)
+    !         end do
+    !       end do
+  else if(mrmode == 2) then
+    do i = 1, N_det_ref
+      do i_state = 1, N_states
         delta_ii(i_state,i)= delta_ii_old(i_state,i)
-        do j = 1, N_det_non_ref
+        delta_ii_s2(i_state,i)= delta_ii_s2_old(i_state,i)
+      enddo
+      do j = 1, N_det_non_ref
+        do i_state = 1, N_states
           delta_ij(i_state,j,i) = delta_ij_old(i_state,j,i)
-        end do
+          delta_ij_s2(i_state,j,i) = delta_ij_s2_old(i_state,j,i)
+        enddo
       end do
-    else if(mrmode == 1) then
-      do i = 1, N_det_ref
+    end do
+  else if(mrmode == 1) then
+    do i = 1, N_det_ref
+      do i_state = 1, N_states
         delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state)
-        do j = 1, N_det_non_ref
+        delta_ii_s2(i_state,i)= delta_mrcepa0_ii_s2(i,i_state)
+      enddo
+      do j = 1, N_det_non_ref
+        do i_state = 1, N_states
           delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state)
-        end do
+          delta_ij_s2(i_state,j,i) = delta_mrcepa0_ij_s2(i,j,i_state)
+        enddo
       end do
-    else
-      stop "invalid mrmode"
-    end if
-  end do
+    end do
+  else
+    stop "invalid mrmode"
+  end if
 END_PROVIDER
 
 
@@ -537,28 +590,32 @@ END_PROVIDER
  
  
  BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_cas_s2, (N_det_ref, N_det_ref, N_states) ]
   use bitmasks
   implicit none
   integer :: i,j,k
-  double precision :: Hjk, Hki, Hij
+  double precision :: Sjk,Hjk, Hki, Hij
   !double precision, external :: get_dij
   integer i_state, degree
    
   provide lambda_mrcc dIj
   do i_state = 1, N_states
-    !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,N_det_ref,dij)
+    !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Sjk,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,delta_cas_s2,N_det_ref,dij)
     do i=1,N_det_ref
       do j=1,i
         call get_excitation_degree(psi_ref(1,1,i), psi_ref(1,1,j), degree, N_int)
         delta_cas(i,j,i_state) = 0d0
+        delta_cas_s2(i,j,i_state) = 0d0
         do k=1,N_det_non_ref
 
           call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk)
+          call get_s2(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Sjk)
           
           delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k)
-          !print *, Hjk * get_dij(psi_ref(1,1,i), psi_non_ref(1,1,k), N_int), Hki * get_dij(psi_ref(1,1,j), psi_non_ref(1,1,k), N_int)
+          delta_cas_s2(i,j,i_state) += Sjk * dij(i, k, i_state) ! * Ski * lambda_mrcc(i_state, k)
         end do
         delta_cas(j,i,i_state) = delta_cas(i,j,i_state)
+        delta_cas_s2(j,i,i_state) = delta_cas_s2(i,j,i_state)
       end do
     end do
     !$OMP END PARALLEL DO
@@ -639,6 +696,8 @@ end function
 
  BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij, (N_det_ref,N_det_non_ref,N_states) ]
 &BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii, (N_det_ref,N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij_s2, (N_det_ref,N_det_non_ref,N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii_s2, (N_det_ref,N_states) ]
   use bitmasks
   implicit none
   
@@ -646,7 +705,7 @@ end function
   integer                         :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_, sortRefIdx(N_det_ref)
   logical                         :: ok
   double precision                :: phase_iI, phase_Ik, phase_Jl, phase_IJ, phase_al, diI, hIi, hJi, delta_JI, dkI(1), HkI, ci_inv(1), dia_hla(1)
-  double precision                :: contrib, contrib2,  HIIi, HJk, wall
+  double precision                :: contrib, contrib2,  contrib_s2, contrib2_s2,  HIIi, HJk, wall
   integer, dimension(0:2,2,2)     :: exc_iI, exc_Ik, exc_IJ
   integer(bit_kind)               :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2)
   integer(bit_kind),allocatable   :: sortRef(:,:,:)
@@ -671,14 +730,16 @@ end function
   ! To provide everything
   contrib = dij(1, 1, 1)
   
-  do i_state = 1, N_states
-    delta_mrcepa0_ii(:,:) = 0d0
-    delta_mrcepa0_ij(:,:,:) = 0d0
+  delta_mrcepa0_ii(:,:) = 0d0
+  delta_mrcepa0_ij(:,:,:) = 0d0
+  delta_mrcepa0_ii_s2(:,:) = 0d0
+  delta_mrcepa0_ij_s2(:,:,:) = 0d0
 
-    !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii)       &
-    !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2)       &
+  do i_state = 1, N_states
+    !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii, delta_mrcepa0_ij_s2, delta_mrcepa0_ii_s2)       &
+    !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2,contrib_s2,contrib2_s2) &
     !$OMP shared(active_sorb, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef, cepa0_shortcut, det_cepa0_active)     &
-    !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas) &
+    !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas, delta_cas_s2) &
     !$OMP shared(notf,i_state, sortRef, sortRefIdx, dij)
     do blok=1,cepa0_shortcut(0)
     do i=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1
@@ -721,16 +782,21 @@ end function
 
 !          call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk)
           contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state)
+          contrib_s2 = delta_cas_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state)
           
           if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then
             contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state)
+            contrib2_s2 = contrib_s2 / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state)
             !$OMP ATOMIC
             delta_mrcepa0_ii(J,i_state) -= contrib2 
+            delta_mrcepa0_ii_s2(J,i_state) -= contrib2_s2 
           else
             contrib = contrib * 0.5d0
+            contrib_s2 = contrib_s2 * 0.5d0
           end if
           !$OMP ATOMIC
           delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib
+          delta_mrcepa0_ij_s2(J, det_cepa0_idx(i), i_state) += contrib_s2
 
         end do kloop
       end do
@@ -741,7 +807,7 @@ end function
   deallocate(idx_sorted_bit)
   call wall_time(wall)
   print *, "cepa0", wall, notf
-  !stop
+
 END_PROVIDER
 
 
@@ -860,12 +926,14 @@ subroutine set_det_bit(det, p, s)
 end subroutine
 
 
-BEGIN_PROVIDER [ double precision, h_, (N_det_ref,N_det_non_ref) ]
+ BEGIN_PROVIDER [ double precision, h_cache, (N_det_ref,N_det_non_ref) ]
+&BEGIN_PROVIDER [ double precision, s2_cache, (N_det_ref,N_det_non_ref) ]
   implicit none
   integer :: i,j
   do i=1,N_det_ref
   do j=1,N_det_non_ref
-    call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_(i,j))
+    call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_cache(i,j))
+    call get_s2(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, s2_cache(i,j))
   end do
   end do
 END_PROVIDER
diff --git a/plugins/mrcepa0/dressing_slave.irp.f b/plugins/mrcepa0/dressing_slave.irp.f
index f1d6f029..9e9fa65a 100644
--- a/plugins/mrcepa0/dressing_slave.irp.f
+++ b/plugins/mrcepa0/dressing_slave.irp.f
@@ -37,7 +37,7 @@ subroutine mrsc2_dressing_slave(thread,iproc)
   integer(ZMQ_PTR), external     :: new_zmq_push_socket 
   integer(ZMQ_PTR)               :: zmq_socket_push 
 
-  double precision, allocatable  :: delta(:,:,:) 
+  double precision, allocatable  :: delta(:,:,:), delta_s2(:,:,:)
   
 
 
@@ -47,8 +47,8 @@ subroutine mrsc2_dressing_slave(thread,iproc)
   logical                         :: ok
   double precision                :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al
   double precision                :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states)
-  double precision                :: contrib, wall, iwall
-  double precision, allocatable   :: dleat(:,:,:)
+  double precision                :: contrib, contrib_s2, wall, iwall
+  double precision, allocatable   :: dleat(:,:,:), dleat_s2(:,:,:)
   integer, dimension(0:2,2,2)     :: exc_iI, exc_Ik, exc_IJ
   integer(bit_kind)               :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt
   integer, external               :: get_index_in_psi_det_sorted_bit, searchDet, detCmp
@@ -63,6 +63,7 @@ subroutine mrsc2_dressing_slave(thread,iproc)
   call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread)
 
   allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2))
+  allocate (dleat_s2(N_states, N_det_non_ref, 2), delta_s2(N_states,0:N_det_non_ref, 2))
   allocate(komon(0:N_det_non_ref))
 
   do 
@@ -74,10 +75,14 @@ subroutine mrsc2_dressing_slave(thread,iproc)
       cj_inv(i_state) = 1.d0 / psi_ref_coef(J,i_state)
     end do
     !delta = 0.d0
+    !delta_s2 = 0.d0
     n = 0
     delta(:,0,:) = 0d0
     delta(:,:nlink(J),1) = 0d0
     delta(:,:nlink(i_I),2) = 0d0
+    delta_s2(:,0,:) = 0d0
+    delta_s2(:,:nlink(J),1) = 0d0
+    delta_s2(:,:nlink(i_I),2) = 0d0
     komon(0) = 0
     komoned = .false.
     
@@ -121,8 +126,8 @@ subroutine mrsc2_dressing_slave(thread,iproc)
           end if
           i = det_cepa0_idx(linked(m, i_I))
           
-          if(h_(J,i) == 0.d0) cycle
-          if(h_(i_I,i) == 0.d0) cycle
+          if(h_cache(J,i) == 0.d0) cycle
+          if(h_cache(i_I,i) == 0.d0) cycle
           
           !ok = .false.
           !do i_state=1, N_states
@@ -144,10 +149,13 @@ subroutine mrsc2_dressing_slave(thread,iproc)
 !           if(I_i == J) phase_Ii = phase_Ji
           
           do i_state = 1,N_states
-            dkI = h_(J,i) * dij(i_I, i, i_state)!get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,i), N_int)
-            !dkI = h_(J,i) * h_(i_I,i) * lambda_mrcc(i_state, i)
+            dkI = h_cache(J,i) * dij(i_I, i, i_state)
             dleat(i_state, kn, 1) = dkI
             dleat(i_state, kn, 2) = dkI
+
+            dkI = s2_cache(J,i) * dij(i_I, i, i_state)
+            dleat_s2(i_state, kn, 1) = dkI
+            dleat_s2(i_state, kn, 2) = dkI
           end do
 
         end do
@@ -173,26 +181,32 @@ subroutine mrsc2_dressing_slave(thread,iproc)
           !if(lambda_mrcc(i_state, i) == 0d0) cycle
           
 
-          !contrib = h_(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al
+          !contrib = h_cache(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al
           contrib =  dij(i_I, k, i_state) * dleat(i_state, m, 2)
+          contrib_s2 =  dij(i_I, k, i_state) * dleat_s2(i_state, m, 2)
           delta(i_state,ll,1) += contrib
+          delta_s2(i_state,ll,1) += contrib_s2
           if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then
             delta(i_state,0,1) -= contrib * ci_inv(i_state) * psi_non_ref_coef(l,i_state)
+            delta_s2(i_state,0,1) -= contrib_s2 * ci_inv(i_state) * psi_non_ref_coef(l,i_state)
           endif
           
           if(I_i == J) cycle
-          !contrib = h_(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al
+          !contrib = h_cache(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al
           contrib =  dij(J, l, i_state) * dleat(i_state, m, 1)
+          contrib_s2 =  dij(J, l, i_state) * dleat_s2(i_state, m, 1)
           delta(i_state,kk,2) += contrib
+          delta_s2(i_state,kk,2) += contrib_s2
           if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then
             delta(i_state,0,2) -= contrib * cj_inv(i_state) * psi_non_ref_coef(k,i_state)
+            delta_s2(i_state,0,2) -= contrib_s2 * cj_inv(i_state) * psi_non_ref_coef(k,i_state)
           end if
         enddo !i_state
       end do ! while
     end do ! kk
 
       
-      call push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) 
+      call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) 
       call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
  
 !     end if
@@ -208,7 +222,7 @@ subroutine mrsc2_dressing_slave(thread,iproc)
 end
 
 
-subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) 
+subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) 
   use f77_zmq 
   implicit none 
   BEGIN_DOC 
@@ -218,6 +232,7 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id)
   integer, intent(in)            :: i_I, J
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_push 
   double precision,intent(inout) :: delta(N_states, 0:N_det_non_ref, 2)
+  double precision,intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2)
   integer, intent(in)            :: task_id 
   integer                        :: rc , i_state,  i, kk, li
   integer,allocatable            :: idx(:,:)
@@ -278,6 +293,12 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id)
         print *, irp_here,  'f77_zmq_send( zmq_socket_push, delta, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)'
         stop 'error' 
       endif 
+
+      rc = f77_zmq_send( zmq_socket_push, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE)  ! delta_s2(1,0,1) = delta_I   delta_s2(1,0,2) = delta_J 
+      if (rc /=  (n(kk)+1)*8*N_states) then 
+        print *, irp_here,  'f77_zmq_send( zmq_socket_push, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)'
+        stop 'error' 
+      endif 
       
       rc = f77_zmq_send( zmq_socket_push, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) 
       if (rc /=  n(kk)*4) then 
@@ -305,7 +326,7 @@ end
 
 
 
-subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) 
+subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) 
   use f77_zmq 
   implicit none 
   BEGIN_DOC 
@@ -315,6 +336,7 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id)
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull 
   integer, intent(out)           :: i_I, J, n(2)
   double precision, intent(inout) :: delta(N_states, 0:N_det_non_ref, 2)
+  double precision, intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2)
   integer, intent(out)           :: task_id 
   integer                        :: rc , i, kk
   integer,intent(inout) :: idx(N_det_non_ref,2)
@@ -346,9 +368,15 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id)
         stop 'error' 
       endif 
       
+      rc = f77_zmq_recv( zmq_socket_pull, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) 
+      if (rc /= (n(kk)+1)*8*N_states) then 
+        print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)'
+        stop 'error' 
+      endif 
+      
       rc = f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) 
       if (rc /= n(kk)*4) then 
-        print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, delta, n(kk)*4, ZMQ_SNDMORE)'
+        print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE)'
         stop 'error' 
       endif 
     end if
@@ -372,7 +400,7 @@ end
 
 
 
-subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_)
+subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_,delta_ii_s2_,delta_ij_s2_)
   use f77_zmq
   implicit none
   BEGIN_DOC 
@@ -381,11 +409,13 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_)
  
   double precision,intent(inout)               :: delta_ij_(N_states,N_det_non_ref,N_det_ref) 
   double precision,intent(inout)               :: delta_ii_(N_states,N_det_ref)
+  double precision,intent(inout)               :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) 
+  double precision,intent(inout)               :: delta_ii_s2_(N_states,N_det_ref)
 
 !   integer                        :: j,l
   integer                        :: rc 
    
-  double precision, allocatable  :: delta(:,:,:) 
+  double precision, allocatable  :: delta(:,:,:), delta_s2(:,:,:) 
    
   integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket 
   integer(ZMQ_PTR)               :: zmq_to_qp_run_socket 
@@ -401,49 +431,47 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_)
   
   delta_ii_(:,:) = 0d0
   delta_ij_(:,:,:) = 0d0
+  delta_ii_s2_(:,:) = 0d0
+  delta_ij_s2_(:,:,:) = 0d0
 
   zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() 
   zmq_socket_pull = new_zmq_pull_socket() 
  
-  allocate ( delta(N_states,0:N_det_non_ref,2) ) 
+  allocate ( delta(N_states,0:N_det_non_ref,2), delta_s2(N_states,0:N_det_non_ref,2) ) 
   
   allocate(idx(N_det_non_ref,2))
   more = 1 
   do while (more == 1) 
          
-    call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id)
+    call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id)
     
 
       do l=1, n(1)
         do i_state=1,N_states
           delta_ij_(i_state,idx(l,1),i_I) += delta(i_state,l,1)
+          delta_ij_s2_(i_state,idx(l,1),i_I) += delta_s2(i_state,l,1)
         end do
       end do
       
       do l=1, n(2)
         do i_state=1,N_states
           delta_ij_(i_state,idx(l,2),J) += delta(i_state,l,2)
+          delta_ij_s2_(i_state,idx(l,2),J) += delta_s2(i_state,l,2)
         end do
       end do
 
     
-!     
-!       do l=1,nlink(J)
-!         do i_state=1,N_states
-!           delta_ij_(i_state,det_cepa0_idx(linked(l,J)),i_I) += delta(i_state,l,1)
-!           delta_ij_(i_state,det_cepa0_idx(linked(l,i_I)),j) += delta(i_state,l,2)
-!         end do
-!       end do
-!      
        if(n(1) /= 0) then 
        do i_state=1,N_states
          delta_ii_(i_state,i_I) += delta(i_state,0,1)
+         delta_ii_s2_(i_state,i_I) += delta_s2(i_state,0,1)
        end do
        end if
 
       if(n(2) /= 0) then 
        do i_state=1,N_states
          delta_ii_(i_state,J) += delta(i_state,0,2)
+         delta_ii_s2_(i_state,J) += delta_s2(i_state,0,2)
        end do
        end if
 
@@ -454,7 +482,7 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_)
 
  
   enddo 
-  deallocate( delta )
+  deallocate( delta, delta_s2 )
  
   call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
   call end_zmq_pull_socket(zmq_socket_pull)
@@ -466,6 +494,8 @@ end
 
  BEGIN_PROVIDER [ double precision, delta_ij_old, (N_states,N_det_non_ref,N_det_ref) ]
 &BEGIN_PROVIDER [ double precision, delta_ii_old, (N_states,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ij_s2_old, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_s2_old, (N_states,N_det_ref) ]
   implicit none
   
   integer                         :: i_state, i, i_I, J, k, kk, degree, degree2, m, l, deg, ni, m2
@@ -574,10 +604,10 @@ end
 !  rc = pthread_create(collector_thread, mrsc2_dressing_collector)
   print *, nzer, ntot, float(nzer) / float(ntot)
   provide nproc
-  !$OMP PARALLEL DEFAULT(none)  SHARED(delta_ii_old,delta_ij_old)  PRIVATE(i) NUM_THREADS(nproc+1)
+  !$OMP PARALLEL DEFAULT(none)  SHARED(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old)  PRIVATE(i) NUM_THREADS(nproc+1)
       i = omp_get_thread_num()
       if (i==0) then
-        call mrsc2_dressing_collector(delta_ii_old,delta_ij_old)
+        call mrsc2_dressing_collector(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old)
       else
         call mrsc2_dressing_slave_inproc(i)
       endif
diff --git a/plugins/mrcepa0/mrcc.irp.f b/plugins/mrcepa0/mrcc.irp.f
index 91592e62..a5614942 100644
--- a/plugins/mrcepa0/mrcc.irp.f
+++ b/plugins/mrcepa0/mrcc.irp.f
@@ -8,8 +8,16 @@ program mrsc2sub
   
   read_wf = .True.
   SOFT_TOUCH read_wf
-  call print_cas_coefs
   call set_generators_bitmasks_as_holes_and_particles
+  if (.True.) then
+    integer :: i,j
+    do j=1,N_states
+      do i=1,N_det
+        psi_coef(i,j) = CI_eigenvectors(i,j)
+      enddo
+    enddo
+    SOFT_TOUCH psi_coef 
+  endif
   call run(N_states,energy)
   if(do_pt2_end)then
     call run_pt2(N_states,energy)
diff --git a/plugins/mrcepa0/mrcepa0.irp.f b/plugins/mrcepa0/mrcepa0.irp.f
index 34d3dec5..aeacbb39 100644
--- a/plugins/mrcepa0/mrcepa0.irp.f
+++ b/plugins/mrcepa0/mrcepa0.irp.f
@@ -8,8 +8,18 @@ program mrcepa0
   
   read_wf = .True.
   SOFT_TOUCH read_wf
-  call print_cas_coefs
   call set_generators_bitmasks_as_holes_and_particles
+  if (.True.) then
+    integer :: i,j
+    do j=1,N_states
+      do i=1,N_det
+        psi_coef(i,j) = CI_eigenvectors(i,j)
+      enddo
+    enddo
+    TOUCH psi_coef 
+  endif
+  call print_cas_coefs
+  
   call run(N_states,energy)
   if(do_pt2_end)then
     call run_pt2(N_states,energy)
diff --git a/plugins/mrcepa0/mrcepa0_general.irp.f b/plugins/mrcepa0/mrcepa0_general.irp.f
index e3a2d1f5..1b2e2fcb 100644
--- a/plugins/mrcepa0/mrcepa0_general.irp.f
+++ b/plugins/mrcepa0/mrcepa0_general.irp.f
@@ -17,12 +17,11 @@ subroutine run(N_st,energy)
   double precision, allocatable :: lambda(:)
   allocate (lambda(N_states))
 
-  
   thresh_mrcc = thresh_dressed_ci
   n_it_mrcc_max = n_it_max_dressed_ci
 
   if(n_it_mrcc_max == 1) then
-    do j=1,N_states_diag
+    do j=1,N_states
       do i=1,N_det
         psi_coef(i,j) = CI_eigenvectors_dressed(i,j)
       enddo
@@ -31,7 +30,6 @@ subroutine run(N_st,energy)
     call write_double(6,ci_energy_dressed(1),"Final MRCC energy")
     call ezfio_set_mrcepa0_energy(ci_energy_dressed(1))
     call save_wavefunction
-    energy(:) = ci_energy_dressed(:)
   else
     E_new = 0.d0
     delta_E = 1.d0
@@ -39,15 +37,21 @@ subroutine run(N_st,energy)
     lambda = 1.d0
     do while (delta_E > thresh_mrcc)
       iteration += 1
-      print *,  '===========================' 
-      print *,  'MRCEPA0 Iteration', iteration
-      print *,  '===========================' 
+      print *,  '===============================================' 
+      print *,  'MRCEPA0 Iteration', iteration, '/', n_it_mrcc_max
+      print *,  '===============================================' 
       print *,  ''
-      E_old = sum(ci_energy_dressed)
-      call write_double(6,ci_energy_dressed(1),"MRCEPA0 energy")
+      E_old = sum(ci_energy_dressed(1:N_states))
+      do i=1,N_st
+        call write_double(6,ci_energy_dressed(i),"MRCEPA0 energy")
+      enddo
       call diagonalize_ci_dressed(lambda)
-      E_new = sum(ci_energy_dressed)
-      delta_E = dabs(E_new - E_old)
+      E_new = sum(ci_energy_dressed(1:N_states))
+      delta_E = (E_new - E_old)/dble(N_states)
+      print *,  ''
+      call write_double(6,thresh_mrcc,"thresh_mrcc")
+      call write_double(6,delta_E,"delta_E")
+      delta_E = dabs(delta_E)
       call save_wavefunction
       call ezfio_set_mrcepa0_energy(ci_energy_dressed(1))
       if (iteration >= n_it_mrcc_max) then
@@ -55,8 +59,8 @@ subroutine run(N_st,energy)
       endif
     enddo
     call write_double(6,ci_energy_dressed(1),"Final MRCEPA0 energy")
-    energy(:) = ci_energy_dressed(:)
   endif
+  energy(1:N_st) = ci_energy_dressed(1:N_st)
 end
 
 
diff --git a/plugins/mrcepa0/mrsc2.irp.f b/plugins/mrcepa0/mrsc2.irp.f
index d0f44a33..948b1b5c 100644
--- a/plugins/mrcepa0/mrsc2.irp.f
+++ b/plugins/mrcepa0/mrsc2.irp.f
@@ -7,8 +7,16 @@ program mrsc2
   mrmode = 2
   read_wf = .True.
   SOFT_TOUCH read_wf
-  call print_cas_coefs
   call set_generators_bitmasks_as_holes_and_particles
+  if (.True.) then
+    integer :: i,j
+    do j=1,N_states
+      do i=1,N_det
+        psi_coef(i,j) = CI_eigenvectors(i,j)
+      enddo
+    enddo
+    TOUCH psi_coef 
+  endif
   call run(N_states,energy)
   if(do_pt2_end)then
     call run_pt2(N_states,energy)
diff --git a/src/Davidson/EZFIO.cfg b/src/Davidson/EZFIO.cfg
index 415e359e..7724400f 100644
--- a/src/Davidson/EZFIO.cfg
+++ b/src/Davidson/EZFIO.cfg
@@ -6,7 +6,25 @@ default: 1.e-12
 
 [n_states_diag]
 type: States_number
-doc: n_states_diag
+doc: Number of states to consider during the Davdison diagonalization
 default: 10
 interface: ezfio,provider,ocaml
 
+[davidson_sze_max]
+type: Strictly_positive_int
+doc: Number of micro-iterations before re-contracting
+default: 10
+interface: ezfio,provider,ocaml
+
+[state_following]
+type: logical
+doc: If true, the states are re-ordered to match the input states
+default: False
+interface: ezfio,provider,ocaml
+
+[disk_based_davidson]
+type: logical
+doc: If true, disk space is used to store the vectors
+default: False
+interface: ezfio,provider,ocaml
+
diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f
index fddac471..2402f973 100644
--- a/src/Davidson/diagonalization_hs2.irp.f
+++ b/src/Davidson/diagonalization_hs2.irp.f
@@ -45,7 +45,11 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d
   !$OMP END DO 
   !$OMP END PARALLEL
 
-  call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
+  if (disk_based_davidson) then
+    call davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
+  else
+    call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
+  endif
   do i=1,N_st_diag
     s2_out(i) = S2_jj(i)
   enddo
@@ -83,8 +87,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   integer, intent(in)            :: dim_in, sze, N_st, N_st_diag, Nint
   integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
   double precision,  intent(in)  :: H_jj(sze)
-  double precision,  intent(inout)  :: S2_jj(sze)
-  integer,  intent(in)  :: iunit
+  double precision,  intent(inout) :: S2_jj(sze)
+  integer,  intent(in)           :: iunit
   double precision, intent(inout) :: u_in(dim_in,N_st_diag)
   double precision, intent(out)  :: energies(N_st_diag)
   
@@ -98,7 +102,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   integer                        :: k_pairs, kl
   
   integer                        :: iter2
-  double precision, allocatable  :: W(:,:),  U(:,:), S(:,:)
+  double precision, allocatable  :: W(:,:),  U(:,:), S(:,:), overlap(:,:)
   double precision, allocatable  :: y(:,:), h(:,:), lambda(:), s2(:)
   double precision, allocatable  :: c(:), s_(:,:), s_tmp(:,:)
   double precision               :: diag_h_mat_elem
@@ -107,17 +111,19 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   double precision               :: to_print(3,N_st)
   double precision               :: cpu, wall
   integer                        :: shift, shift2, itermax
+  double precision               :: r1, r2
+  logical                        :: state_ok(N_st_diag*davidson_sze_max)
   include 'constants.include.F'
   
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
   if (N_st_diag*3 > sze) then
-     print *,  'error in Davidson :'
-     print *,  'Increase n_det_max_jacobi to ', N_st_diag*3
-     stop -1
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_jacobi to ', N_st_diag*3
+    stop -1
   endif
-
+  
   PROVIDE nuclear_repulsion expected_s2
-
+  
   call write_time(iunit)
   call wall_time(wall)
   call cpu_time(cpu)
@@ -136,7 +142,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   write(iunit,'(A)') trim(write_buffer)
   write_buffer = ' Iter'
   do i=1,N_st
-    write_buffer = trim(write_buffer)//'      Energy          S^2      Residual'
+    write_buffer = trim(write_buffer)//'      Energy          S^2      Residual  '
   enddo
   write(iunit,'(A)') trim(write_buffer)
   write_buffer = '===== '
@@ -144,31 +150,32 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
     write_buffer = trim(write_buffer)//' ================ =========== ==========='
   enddo
   write(iunit,'(A)') trim(write_buffer)
-
-  integer, external :: align_double
+  
+  integer, external              :: align_double
   sze_8 = align_double(sze)
-
-  itermax = min(davidson_sze_max, sze/N_st_diag)
+  
+  itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
   allocate(                                                          &
-      W(sze_8,N_st_diag*itermax),                           &
-      U(sze_8,N_st_diag*itermax),                           &
-      S(sze_8,N_st_diag*itermax),                           &
-      h(N_st_diag*itermax,N_st_diag*itermax),      &
-      y(N_st_diag*itermax,N_st_diag*itermax),      &
-      s_(N_st_diag*itermax,N_st_diag*itermax),     &
-      s_tmp(N_st_diag*itermax,N_st_diag*itermax),  &
+      W(sze_8,N_st_diag*itermax),                                    &
+      U(sze_8,N_st_diag*itermax),                                    &
+      S(sze_8,N_st_diag*itermax),                                    &
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      s_(N_st_diag*itermax,N_st_diag*itermax),                       &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
       residual_norm(N_st_diag),                                      &
-      c(N_st_diag*itermax),                                 &
-      s2(N_st_diag*itermax),                                &
+      c(N_st_diag*itermax),                                          &
+      s2(N_st_diag*itermax),                                         &
+      overlap(N_st_diag*itermax, N_st_diag*itermax),                 &
       lambda(N_st_diag*itermax))
   
-  h  = 0.d0
-  s_ = 0.d0
-  s_tmp = 0.d0
+  h = 0.d0
   U = 0.d0
   W = 0.d0
   S = 0.d0
   y = 0.d0
+  s_ = 0.d0
+  s_tmp = 0.d0
 
 
   ASSERT (N_st > 0)
@@ -182,28 +189,21 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   
   converged = .False.
   
-  double precision               :: r1, r2
-  do k=N_st+1,N_st_diag-2,2
-      do i=1,sze
-        call random_number(r1)
-        call random_number(r2)
-        r1 = dsqrt(-2.d0*dlog(r1))
-        r2 = dtwo_pi*r2
-        u_in(i,k) = r1*dcos(r2)
-        u_in(i,k+1) = r1*dsin(r2)
-      enddo
+  do k=N_st+1,N_st_diag
+    u_in(k,k) = 10.d0
+    do i=1,sze
+      call random_number(r1)
+      call random_number(r2)
+      r1 = dsqrt(-2.d0*dlog(r1))
+      r2 = dtwo_pi*r2
+      u_in(i,k) = r1*dcos(r2)
+    enddo
   enddo
-  do k=N_st_diag-1,N_st_diag
-      do i=1,sze
-        call random_number(r1)
-        call random_number(r2)
-        r1 = dsqrt(-2.d0*dlog(r1))
-        r2 = dtwo_pi*r2
-        u_in(i,k) = r1*dcos(r2)
-      enddo
+  do k=1,N_st_diag
+    call normalize(u_in(1,k),sze)
   enddo
-
-
+  
+  
   do while (.not.converged)
     
     do k=1,N_st_diag
@@ -211,12 +211,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
         U(i,k) = u_in(i,k)
       enddo
     enddo
-
+    
     do iter=1,itermax-1
       
       shift  = N_st_diag*(iter-1)
       shift2 = N_st_diag*iter
-
+      
       call ortho_qr(U,size(U,1),sze,shift2)
 
       ! Compute |W_k> = \sum_i |i><i|H|u_k>
@@ -239,8 +239,49 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
           0.d0, s_, size(s_,1))
 
 
+!      ! Diagonalize S^2
+!      ! ---------------
+!
+!      call lapack_diag(s2,y,s_,size(s_,1),shift2)
+!
+!
+!      ! Rotate H in the basis of eigenfunctions of s2
+!      ! ---------------------------------------------
+!
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('T','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+!
+!      ! Damp interaction between different spin states
+!      ! ------------------------------------------------
+!
+!      do k=1,shift2
+!        do l=1,shift2
+!          if (dabs(s2(k) - s2(l)) > 1.d0) then
+!            h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l))))
+!          endif
+!        enddo
+!      enddo
+!
+!      ! Rotate back H 
+!      ! -------------
+!
+!      call dgemm('N','T',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+
+      
       ! Diagonalize h
       ! -------------
+
       call lapack_diag(lambda,y,h,size(h,1),shift2)
       
       ! Compute S2 for each eigenvector
@@ -261,24 +302,70 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
       enddo
 
       if (s2_eig) then
-        logical :: state_ok(N_st_diag*davidson_sze_max)
+          do k=1,shift2
+            state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          enddo
+      else
+        state_ok(k) = .True.
+      endif
+
+      do k=1,shift2
+        if (.not. state_ok(k)) then
+          do l=k+1,shift2
+            if (state_ok(l)) then
+              call dswap(shift2, y(1,k), 1, y(1,l), 1)
+              call dswap(1, s2(k), 1, s2(l), 1)
+              call dswap(1, lambda(k), 1, lambda(l), 1)
+              state_ok(k) = .True.
+              state_ok(l) = .False.
+              exit
+            endif
+          enddo
+        endif
+      enddo
+
+      if (state_following) then
+
+        integer                        :: order(N_st_diag)
+        double precision               :: cmax
+
+        overlap = -1.d0
         do k=1,shift2
-          state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          do i=1,shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
         enddo
-        do k=1,shift2
-          if (.not. state_ok(k)) then
-            do l=k+1,shift2
-              if (state_ok(l)) then
-                call dswap(shift2, y(1,k), 1, y(1,l), 1)
-                call dswap(1, s2(k), 1, s2(l), 1)
-                call dswap(1, lambda(k), 1, lambda(l), 1)
-                state_ok(k) = .True.
-                state_ok(l) = .False.
-                exit
-              endif
-            enddo
+        do k=1,N_st
+          cmax = -1.d0
+          do i=1,N_st
+            if (overlap(i,k) > cmax) then
+              cmax = overlap(i,k) 
+              order(k) = i
+            endif
+          enddo
+          do i=1,N_st_diag
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
           endif
         enddo
+        do k=1,N_st
+          overlap(k,1) = lambda(k)
+          overlap(k,2) = s2(k)
+        enddo
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            lambda(k) = overlap(l,1)
+            s2(k) = overlap(l,2)
+          endif
+        enddo
+        
       endif
 
 
@@ -296,11 +383,31 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
       ! -----------------------------------------
       
       do k=1,N_st_diag
-        do i=1,sze
-          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
-              * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
-             )/max(H_jj(i) - lambda (k),1.d-2)
-        enddo
+        if (state_ok(k)) then
+          do i=1,sze
+            U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
+                * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
+              )/max(H_jj(i) - lambda (k),1.d-2)
+          enddo
+        else
+          ! Randomize components with bad <S2>
+            do i=1,sze-2,2
+              call random_number(r1)
+              call random_number(r2)
+              r1 = dsqrt(-2.d0*dlog(r1))
+              r2 = dtwo_pi*r2
+              U(i,shift2+k) = r1*dcos(r2)
+              U(i+1,shift2+k) = r1*dsin(r2)
+            enddo
+            do i=sze-2+1,sze
+              call random_number(r1)
+              call random_number(r2)
+              r1 = dsqrt(-2.d0*dlog(r1))
+              r2 = dtwo_pi*r2
+              U(i,shift2+k) = r1*dcos(r2)
+            enddo
+        endif
+
         if (k <= N_st) then
           residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
           to_print(1,k) = lambda(k) + nuclear_repulsion
@@ -345,7 +452,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
 
   deallocate (                                                       &
       W, residual_norm,                                              &
-      U,                                                             &
+      U, overlap,                                                    &
       c, S,                                                          &
       h,                                                             &
       y, s_, s_tmp,                                                  &
@@ -353,3 +460,439 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
       )
 end
 
+subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
+  use bitmasks
+  use mmap_module
+  implicit none
+  BEGIN_DOC
+  ! Davidson diagonalization with specific diagonal elements of the H matrix
+  !
+  ! H_jj : specific diagonal H matrix elements to diagonalize de Davidson
+  !
+  ! S2_jj : specific diagonal S^2 matrix elements 
+  !
+  ! dets_in : bitmasks corresponding to determinants
+  !
+  ! u_in : guess coefficients on the various states. Overwritten
+  !   on exit
+  !
+  ! dim_in : leftmost dimension of u_in
+  !
+  ! sze : Number of determinants
+  !
+  ! N_st : Number of eigenstates
+  ! 
+  ! N_st_diag : Number of states in which H is diagonalized. Assumed > sze
+  !
+  ! iunit : Unit for the I/O
+  !
+  ! Initial guess vectors are not necessarily orthonormal
+  END_DOC
+  integer, intent(in)            :: dim_in, sze, N_st, N_st_diag, Nint
+  integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
+  double precision,  intent(in)  :: H_jj(sze)
+  double precision,  intent(inout) :: S2_jj(sze)
+  integer,  intent(in)           :: iunit
+  double precision, intent(inout) :: u_in(dim_in,N_st_diag)
+  double precision, intent(out)  :: energies(N_st_diag)
+  
+  integer                        :: sze_8
+  integer                        :: iter
+  integer                        :: i,j,k,l,m
+  logical                        :: converged
+  
+  double precision               :: u_dot_v, u_dot_u
+  
+  integer                        :: k_pairs, kl
+  
+  integer                        :: iter2
+  double precision, pointer      :: W(:,:),  U(:,:), S(:,:), overlap(:,:)
+  double precision, allocatable  :: y(:,:), h(:,:), lambda(:), s2(:)
+  double precision, allocatable  :: c(:), s_(:,:), s_tmp(:,:)
+  double precision               :: diag_h_mat_elem
+  double precision, allocatable  :: residual_norm(:)
+  character*(16384)              :: write_buffer
+  double precision               :: to_print(3,N_st)
+  double precision               :: cpu, wall
+  logical                        :: state_ok(N_st_diag*davidson_sze_max)
+  integer                        :: shift, shift2, itermax
+  include 'constants.include.F'
+  
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
+  if (N_st_diag*3 > sze) then
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_jacobi to ', N_st_diag*3
+    stop -1
+  endif
+  
+  PROVIDE nuclear_repulsion expected_s2
+  
+  call write_time(iunit)
+  call wall_time(wall)
+  call cpu_time(cpu)
+  write(iunit,'(A)') ''
+  write(iunit,'(A)') 'Davidson Diagonalization'
+  write(iunit,'(A)') '------------------------'
+  write(iunit,'(A)') ''
+  call write_int(iunit,N_st,'Number of states')
+  call write_int(iunit,N_st_diag,'Number of states in diagonalization')
+  call write_int(iunit,sze,'Number of determinants')
+  write(iunit,'(A)') ''
+  write_buffer = '===== '
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ =========== ==========='
+  enddo
+  write(iunit,'(A)') trim(write_buffer)
+  write_buffer = ' Iter'
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//'      Energy          S^2      Residual  '
+  enddo
+  write(iunit,'(A)') trim(write_buffer)
+  write_buffer = '===== '
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ =========== ==========='
+  enddo
+  write(iunit,'(A)') trim(write_buffer)
+  
+  integer, external              :: align_double
+  integer                        :: fd(3)
+  type(c_ptr)                    :: c_pointer(3)
+  sze_8 = align_double(sze)
+  
+  itermax = min(davidson_sze_max, sze/N_st_diag)
+  
+  call mmap(                                                         &
+      trim(ezfio_work_dir)//'U',                                     &
+      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                   &
+      8, fd(1), .False., c_pointer(1))
+  call c_f_pointer(c_pointer(1), W, (/ sze_8,N_st_diag*itermax /) )
+  
+  call mmap(                                                         &
+      trim(ezfio_work_dir)//'W',                                     &
+      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                   &
+      8, fd(2), .False., c_pointer(2))
+  call c_f_pointer(c_pointer(2), U, (/ sze_8,N_st_diag*itermax /) )
+  
+  call mmap(                                                         &
+      trim(ezfio_work_dir)//'S',                                     &
+      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                   &
+      8, fd(3), .False., c_pointer(3))
+  call c_f_pointer(c_pointer(3), S, (/ sze_8,N_st_diag*itermax /) )
+  
+  allocate(                                                          &
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      s_(N_st_diag*itermax,N_st_diag*itermax),                       &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
+      overlap(N_st_diag*itermax, N_st_diag*itermax),                 &
+      residual_norm(N_st_diag),                                      &
+      c(N_st_diag*itermax),                                          &
+      s2(N_st_diag*itermax),                                         &
+      lambda(N_st_diag*itermax))
+  
+  h = 0.d0
+  U = 0.d0
+  W = 0.d0
+  S = 0.d0
+  y = 0.d0
+  s_ = 0.d0
+  s_tmp = 0.d0
+  
+  
+  ASSERT (N_st > 0)
+  ASSERT (N_st_diag >= N_st)
+  ASSERT (sze > 0)
+  ASSERT (Nint > 0)
+  ASSERT (Nint == N_int)
+  
+  ! Davidson iterations
+  ! ===================
+  
+  converged = .False.
+  
+  double precision               :: r1, r2
+  do k=N_st+1,N_st_diag
+      u_in(k,k) = 10.d0
+      do i=1,sze
+        call random_number(r1)
+        r1 = dsqrt(-2.d0*dlog(r1))
+        r2 = dtwo_pi*r2
+        u_in(i,k) = r1*dcos(r2)
+      enddo
+  enddo
+  do k=1,N_st_diag
+    call normalize(u_in(1,k),sze)
+  enddo
+
+
+  do while (.not.converged)
+    
+    do k=1,N_st_diag
+      do i=1,sze
+        U(i,k) = u_in(i,k)
+      enddo
+    enddo
+
+    do iter=1,itermax-1
+      
+      shift  = N_st_diag*(iter-1)
+      shift2 = N_st_diag*iter
+
+      call ortho_qr(U,size(U,1),sze,shift2)
+
+      ! Compute |W_k> = \sum_i |i><i|H|u_k>
+      ! -----------------------------------------
+      
+       
+!      call H_S2_u_0_nstates_zmq(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8)
+      call H_S2_u_0_nstates(W(1,shift+1),S(1,shift+1),U(1,shift+1),H_jj,S2_jj,sze,dets_in,Nint,N_st_diag,sze_8)
+      
+      
+      ! Compute h_kl = <u_k | W_l> = <u_k| H |u_l>
+      ! -------------------------------------------
+
+      do k=1,iter
+        shift  = N_st_diag*(k-1)
+        call dgemm('T','N', N_st_diag, shift2, sze,                       &
+            1.d0, U(1,shift+1), size(U,1), W, size(W,1),                          &
+            0.d0, h(shift+1,1), size(h,1))
+        
+        call dgemm('T','N', N_st_diag, shift2, sze,                       &
+            1.d0, U(1,shift+1), size(U,1), S, size(S,1),                          &
+            0.d0, s_(shift+1,1), size(s_,1))
+      enddo
+
+!      ! Diagonalize S^2
+!      ! ---------------
+!
+!      call lapack_diag(s2,y,s_,size(s_,1),shift2)
+!
+!
+!      ! Rotate H in the basis of eigenfunctions of s2
+!      ! ---------------------------------------------
+!
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('T','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+!
+!      ! Damp interaction between different spin states
+!      ! ------------------------------------------------
+!
+!      do k=1,shift2
+!        do l=1,shift2
+!          if (dabs(s2(k) - s2(l)) > 1.d0) then
+!            h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l))))
+!          endif
+!        enddo
+!      enddo
+!
+!      ! Rotate back H 
+!      ! -------------
+!
+!      call dgemm('N','T',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+
+
+      ! Diagonalize h
+      ! -------------
+      call lapack_diag(lambda,y,h,size(h,1),shift2)
+      
+      ! Compute S2 for each eigenvector
+      ! -------------------------------
+
+      call dgemm('N','N',shift2,shift2,shift2,                       &
+          1.d0, s_, size(s_,1), y, size(y,1),                        &
+          0.d0, s_tmp, size(s_tmp,1))
+      
+      call dgemm('T','N',shift2,shift2,shift2,                       &
+          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+          0.d0, s_, size(s_,1))
+
+
+      
+      do k=1,shift2
+        s2(k) = s_(k,k) + S_z2_Sz
+      enddo
+
+
+      if (s2_eig) then
+          do k=1,shift2
+            state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          enddo
+      else
+        state_ok(k) = .True.
+      endif
+
+      do k=1,shift2
+        if (.not. state_ok(k)) then
+          do l=k+1,shift2
+            if (state_ok(l)) then
+              call dswap(shift2, y(1,k), 1, y(1,l), 1)
+              call dswap(1, s2(k), 1, s2(l), 1)
+              call dswap(1, lambda(k), 1, lambda(l), 1)
+              state_ok(k) = .True.
+              state_ok(l) = .False.
+              exit
+            endif
+          enddo
+        endif
+      enddo
+
+      if (state_following) then
+
+        ! Compute overlap with U_in
+        ! -------------------------
+        
+        integer                        :: order(N_st_diag)
+        double precision               :: cmax
+        overlap = -1.d0
+        do k=1,shift2
+          do i=1,shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
+        enddo
+        do k=1,N_st
+          cmax = -1.d0
+          do i=1,shift2
+            if (overlap(i,k) > cmax) then
+              cmax = overlap(i,k) 
+              order(k) = i
+            endif
+          enddo
+          do i=1,shift2
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
+          endif
+        enddo
+        do k=1,N_st
+          overlap(k,1) = lambda(k)
+          overlap(k,2) = s2(k)
+        enddo
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            lambda(k) = overlap(l,1)
+            s2(k) = overlap(l,2)
+          endif
+        enddo
+        
+      endif
+
+
+      ! Express eigenvectors of h in the determinant basis
+      ! --------------------------------------------------
+      
+      call dgemm('N','N', sze, N_st_diag, shift2,                    &
+          1.d0, U, size(U,1), y, size(y,1), 0.d0, U(1,shift2+1), size(U,1))
+      call dgemm('N','N', sze, N_st_diag, shift2,                    &
+          1.d0, W, size(W,1), y, size(y,1), 0.d0, W(1,shift2+1), size(W,1))
+      call dgemm('N','N', sze, N_st_diag, shift2,                    &
+          1.d0, S, size(S,1), y, size(y,1), 0.d0, S(1,shift2+1), size(S,1))
+
+      ! Compute residual vector and davidson step
+      ! -----------------------------------------
+      
+      do k=1,N_st_diag
+        if (state_ok(k)) then
+          do i=1,sze
+            U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
+                * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
+              )/max(H_jj(i) - lambda (k),1.d-2)
+          enddo
+        else
+          ! Randomize components with bad <S2>
+            do i=1,sze-2,2
+              call random_number(r1)
+              call random_number(r2)
+              r1 = dsqrt(-2.d0*dlog(r1))
+              r2 = dtwo_pi*r2
+              U(i,shift2+k) = r1*dcos(r2)
+              U(i+1,shift2+k) = r1*dsin(r2)
+            enddo
+            do i=sze-2+1,sze
+              call random_number(r1)
+              call random_number(r2)
+              r1 = dsqrt(-2.d0*dlog(r1))
+              r2 = dtwo_pi*r2
+              U(i,shift2+k) = r1*dcos(r2)
+            enddo
+        endif
+
+        if (k <= N_st) then
+          residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
+          to_print(1,k) = lambda(k) + nuclear_repulsion
+          to_print(2,k) = s2(k)
+          to_print(3,k) = residual_norm(k)
+        endif
+      enddo
+      
+      write(iunit,'(X,I3,X,100(X,F16.10,X,F11.6,X,E11.3))')  iter, to_print(1:3,1:N_st)
+      call davidson_converged(lambda,residual_norm,wall,iter,cpu,N_st,converged)
+      do k=1,N_st
+        if (residual_norm(k) > 1.e8) then
+        print *,  ''
+          stop 'Davidson failed'
+        endif
+      enddo
+      if (converged) then
+        exit
+      endif
+      
+    enddo
+
+    ! Re-contract to u_in
+    ! -----------
+    
+    call dgemm('N','N', sze, N_st_diag, shift2, 1.d0,      &
+        U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
+
+  enddo
+
+  do k=1,N_st_diag
+    energies(k) = lambda(k)
+    S2_jj(k) = s2(k)
+  enddo
+  write_buffer = '===== '
+  do i=1,N_st
+    write_buffer = trim(write_buffer)//' ================ =========== ==========='
+  enddo
+  write(iunit,'(A)') trim(write_buffer)
+  write(iunit,'(A)') ''
+  call write_time(iunit)
+
+  call munmap(                                                        &
+      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                                &
+      8, fd(1), c_pointer(1))
+  
+  call munmap(                                                        &
+      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                                &
+      8, fd(2), c_pointer(2))
+  
+  call munmap(                                                        &
+      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                                &
+      8, fd(3), c_pointer(3))
+
+  deallocate (                                                       &
+      residual_norm,                                                 &
+      c, overlap,                                                    &
+      h,                                                             &
+      y, s_, s_tmp,                                                  &
+      lambda                                                         &
+      )
+end
+
diff --git a/src/Davidson/parameters.irp.f b/src/Davidson/parameters.irp.f
index 82315495..ae8babaa 100644
--- a/src/Davidson/parameters.irp.f
+++ b/src/Davidson/parameters.irp.f
@@ -1,21 +1,3 @@
-BEGIN_PROVIDER [ integer, davidson_iter_max ]
-  implicit none
-  BEGIN_DOC
-  ! Max number of Davidson iterations
-  END_DOC
-  davidson_iter_max = 100
-END_PROVIDER
-
-BEGIN_PROVIDER [ integer, davidson_sze_max ]
-  implicit none
-  BEGIN_DOC
-  ! Max number of Davidson sizes
-  END_DOC
-  ASSERT (davidson_sze_max <= davidson_iter_max)
-  davidson_sze_max = N_states+7
-END_PROVIDER
-
-
 BEGIN_PROVIDER [ character(64), davidson_criterion ]
  implicit none
  BEGIN_DOC
diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f
index e34ba3ce..dd5ab1ab 100644
--- a/src/Davidson/u0Hu0.irp.f
+++ b/src/Davidson/u0Hu0.irp.f
@@ -422,7 +422,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
 
   !$OMP CRITICAL (u0Hu0)
   do istate=1,N_st
-    do i=n,1,-1
+    do i=1,n
       v_0(i,istate) = v_0(i,istate) + vt(istate,i)
       s_0(i,istate) = s_0(i,istate) + st(istate,i)
     enddo
diff --git a/src/Utils/LinearAlgebra.irp.f b/src/Utils/LinearAlgebra.irp.f
index 9c3b35b5..44a15ddf 100644
--- a/src/Utils/LinearAlgebra.irp.f
+++ b/src/Utils/LinearAlgebra.irp.f
@@ -148,17 +148,42 @@ subroutine ortho_qr(A,LDA,m,n)
 
   allocate (jpvt(n), tau(n), work(1))
   LWORK=-1
-!  call dgeqp3(m, n, A, LDA, jpvt, tau, WORK, LWORK, INFO)
   call  dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
-  LWORK=WORK(1)
+  LWORK=2*WORK(1)
   deallocate(WORK)
   allocate(WORK(LWORK))
-!  call dgeqp3(m, n, A, LDA, jpvt, tau, WORK, LWORK, INFO)
   call  dgeqrf( m, n, A, LDA, TAU, WORK, LWORK, INFO )
   call dorgqr(m, n, n, A, LDA, tau, WORK, LWORK, INFO)
   deallocate(WORK,jpvt,tau)
 end
 
+subroutine ortho_qr_unblocked(A,LDA,m,n)
+  implicit none
+  BEGIN_DOC
+  ! Orthogonalization using Q.R factorization
+  !
+  ! A : matrix to orthogonalize
+  !
+  ! LDA : leftmost dimension of A
+  !
+  ! n : Number of rows of A
+  !
+  ! m : Number of columns of A
+  !
+  END_DOC
+  integer, intent(in)            :: m,n, LDA
+  double precision, intent(inout) :: A(LDA,n)
+
+  integer                        :: info
+  integer, allocatable           :: jpvt(:)
+  double precision, allocatable  :: tau(:), work(:)
+
+  allocate (jpvt(n), tau(n), work(n))
+  call  dgeqr2( m, n, A, LDA, TAU, WORK, INFO )
+  call dorg2r(m, n, n, A, LDA, tau, WORK, INFO)
+  deallocate(WORK,jpvt,tau)
+end
+
 subroutine ortho_lowdin(overlap,LDA,N,C,LDC,m)
   implicit none
   BEGIN_DOC
@@ -444,7 +469,12 @@ subroutine lapack_diag(eigvalues,eigvectors,H,nmax,n)
     print *, irp_here, ': DSYEV: the ',-info,'-th argument had an illegal value'
     stop 2
   else if( info > 0  ) then
-     write(*,*)'DSYEV Failed'
+     write(*,*)'DSYEV Failed : ', info
+     do i=1,n
+      do j=1,n
+        print *,  H(i,j)
+      enddo
+     enddo
      stop 1
   end if
 
diff --git a/src/Utils/map_module.f90 b/src/Utils/map_module.f90
index 4a83582f..80260233 100644
--- a/src/Utils/map_module.f90
+++ b/src/Utils/map_module.f90
@@ -622,7 +622,7 @@ subroutine search_key_big_interval(key,X,sze,idx,ibegin_in,iend_in)
     
     istep = ishft(iend-ibegin,-1)
     idx = ibegin + istep
-    do while (istep > 16)
+    do while (istep > 64)
       idx = ibegin + istep
       ! TODO : Cache misses 
       if (cache_key < X(idx)) then
@@ -660,8 +660,8 @@ subroutine search_key_big_interval(key,X,sze,idx,ibegin_in,iend_in)
       endif
     enddo
     idx = ibegin
-    if (min(iend_in,sze) > ibegin+16) then
-      iend = ibegin+16
+    if (min(iend_in,sze) > ibegin+64) then
+      iend = ibegin+64
       do while (cache_key > X(idx))
         idx = idx+1
       end do
@@ -730,7 +730,7 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in
     
     istep = ishft(iend-ibegin,-1)
     idx = ibegin + istep
-    do while (istep > 16)
+    do while (istep > 64)
       idx = ibegin + istep
       if (cache_key < X(idx)) then
         iend = idx
@@ -771,8 +771,8 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in
     enddo
     idx = ibegin
     value = Y(idx)
-    if (min(iend_in,sze) > ibegin+16) then
-      iend = ibegin+16
+    if (min(iend_in,sze) > ibegin+64) then
+      iend = ibegin+64
       do while (cache_key > X(idx))
         idx = idx+1
         value = Y(idx)
diff --git a/tests/bats/cassd.bats b/tests/bats/cassd.bats
index 44b44ee6..f43ffaaa 100644
--- a/tests/bats/cassd.bats
+++ b/tests/bats/cassd.bats
@@ -5,13 +5,22 @@ source $QP_ROOT/tests/bats/common.bats.sh
 @test "CAS_SD H2O cc-pVDZ" {
   test_exe cassd_zmq || skip
   INPUT=h2o.ezfio
+  rm -rf work/h2o.ezfio/determinants/
   qp_edit -c $INPUT
   ezfio set_file $INPUT
-  ezfio set perturbation do_pt2_end False
-  ezfio set determinants n_det_max 2000
+  ezfio set perturbation do_pt2_end True 
+  ezfio set determinants n_det_max 16384
   qp_set_mo_class $INPUT -core "[1]" -inact "[2,5]" -act "[3,4,6,7]" -virt "[8-24]" 
   qp_run cassd_zmq $INPUT  
+  energy="$(ezfio get cas_sd_zmq energy_pt2)"
+  eq $energy -76.23109  2.E-5
+
+  ezfio set determinants n_det_max 2048 
+  ezfio set determinants read_wf True
+  ezfio set perturbation do_pt2_end True 
+  qp_run cassd_zmq $INPUT  
+  ezfio set determinants read_wf False
   energy="$(ezfio get cas_sd_zmq energy)"
-  eq $energy -76.2221842108163 1.E-5
+  eq $energy -76.2300888408526   2.E-5
 }
 
diff --git a/tests/bats/mrcepa0.bats b/tests/bats/mrcepa0.bats
index 8b56c606..ed69681f 100644
--- a/tests/bats/mrcepa0.bats
+++ b/tests/bats/mrcepa0.bats
@@ -15,8 +15,8 @@ source $QP_ROOT/tests/bats/common.bats.sh
   ezfio set mrcepa0 lambda_type 1
   ezfio set mrcepa0 n_it_max_dressed_ci 3
   qp_run $EXE $INPUT  
-  energy="$(ezfio get mrcepa0 energy)"
-  eq $energy -76.22903276183061 1.e-4
+  energy="$(ezfio get mrcepa0 energy_pt2)"
+  eq $energy -76.238562120457431 1.e-4
 }
 
 @test "MRCC H2O cc-pVDZ" {
@@ -32,8 +32,8 @@ source $QP_ROOT/tests/bats/common.bats.sh
   ezfio set mrcepa0 lambda_type 0
   ezfio set mrcepa0 n_it_max_dressed_ci 3
   qp_run $EXE $INPUT  
-  energy="$(ezfio get mrcepa0 energy)"
-  eq $energy -76.22899302846875 1.e-4
+  energy="$(ezfio get mrcepa0 energy_pt2)"
+  eq $energy -76.238527498388962 1.e-4
 }
 
 @test "MRSC2 H2O cc-pVDZ" {
@@ -48,8 +48,8 @@ source $QP_ROOT/tests/bats/common.bats.sh
   ezfio set mrcepa0 lambda_type 0
   ezfio set mrcepa0 n_it_max_dressed_ci 3
   qp_run $EXE $INPUT  
-  energy="$(ezfio get mrcepa0 energy)"
-  eq $energy -76.22647345292708 1.e-4
+  energy="$(ezfio get mrcepa0 energy_pt2)"
+  eq $energy -76.235833732594187 1.e-4
 }
 
 @test "MRCEPA0 H2O cc-pVDZ" {
@@ -64,7 +64,7 @@ source $QP_ROOT/tests/bats/common.bats.sh
   ezfio set mrcepa0 lambda_type 0
   ezfio set mrcepa0 n_it_max_dressed_ci 3
   qp_run $EXE $INPUT  
-  energy="$(ezfio get mrcepa0 energy)"
-  eq $energy -76.23199784430074 1.e-4
+  energy="$(ezfio get mrcepa0 energy_pt2)"
+  eq $energy -76.2418799284763 1.e-4
 }
 
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
index 4664ce82..9e560d38 100755
--- a/tests/run_tests.sh
+++ b/tests/run_tests.sh
@@ -14,7 +14,7 @@ mrcepa0.bats
 
 
 export QP_PREFIX="timeout -s 9 300"
-export QP_TASK_DEBUG=1
+#export QP_TASK_DEBUG=1
 
 rm -rf work output