diff --git a/.travis.yml b/.travis.yml
index 57991ba3..22cd358e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,12 +9,12 @@ sudo: false
 addons:
   apt:
     packages:
-    - zlib1g-dev
-    - libgmp3-dev
     - gfortran
     - gcc
     - liblapack-dev
     - graphviz
+#    - zlib1g-dev
+#    - libgmp3-dev
 
 cache:
   directories:
@@ -29,4 +29,4 @@ script:
   - source ./quantum_package.rc ; qp_module.py install Full_CI Full_CI_ZMQ Hartree_Fock CAS_SD_ZMQ mrcepa0 All_singles
   - source ./quantum_package.rc ; ninja
   - source ./quantum_package.rc ; cd ocaml ; make ; cd -
-  - source ./quantum_package.rc ; cd tests ; ./run_tests.sh #-v
+  - source ./quantum_package.rc ; cd tests ; ./run_tests.sh -v
diff --git a/configure b/configure
index 8cb02608..128d7126 100755
--- a/configure
+++ b/configure
@@ -71,8 +71,8 @@ d_dependency = {
     "emsl": ["python"],
     "gcc": [],
     "g++": [],
-    "zeromq" : [ "g++" ],
-    "f77zmq" : [ "zeromq", "python" ],
+    "zeromq" : [ "g++", "make" ],
+    "f77zmq" : [ "zeromq", "python", "make" ],
     "python": [],
     "ninja": ["g++", "python"],
     "make": [],
@@ -150,7 +150,6 @@ f77zmq = Info(
     url='{head}/zeromq/f77_zmq/{tail}'.format(**path_github),
     description=' F77-ZeroMQ',
     default_path=join(QP_ROOT_LIB, "libf77zmq.a") )
-#                join(QP_ROOT, "src", "ZMQ", "f77zmq.h") )
 
 p_graphviz = Info(
     url='https://github.com/xflr6/graphviz/archive/master.tar.gz',
@@ -166,7 +165,7 @@ d_info = dict()
 
 for m in ["ocaml", "m4", "curl", "zlib", "patch", "irpf90", "docopt",
           "resultsFile", "ninja", "emsl", "ezfio", "p_graphviz",
-          "zeromq", "f77zmq","bats" ]:
+          "zeromq", "f77zmq","bats"]: 
     exec ("d_info['{0}']={0}".format(m))
 
 
@@ -494,7 +493,9 @@ def create_ninja_and_rc(l_installed):
         'export PYTHONPATH="${QP_EZFIO}/Python":"${QP_PYTHON}":"${PYTHONPATH}"',
         'export PATH="${QP_PYTHON}":"${QP_ROOT}"/bin:"${QP_ROOT}"/ocaml:"${PATH}"',
         'export LD_LIBRARY_PATH="${QP_ROOT}"/lib:"${LD_LIBRARY_PATH}"',
-        'export LIBRARY_PATH="${QP_ROOT}"/lib:"${LIBRARY_PATH}"', "",
+        'export LIBRARY_PATH="${QP_ROOT}"/lib:"${LIBRARY_PATH}"',
+        'export C_INCLUDE_PATH="${C_INCLUDE_PATH}":"${QP_ROOT}"/include',
+        '',
         'source ${QP_ROOT}/install/EZFIO/Bash/ezfio.sh', "",
         'source ${HOME}/.opam/opam-init/init.sh > /dev/null 2> /dev/null || true',
         '',
diff --git a/include/.empty b/include/.empty
new file mode 100644
index 00000000..e69de29b
diff --git a/install/scripts/build.sh b/install/scripts/build.sh
index 79a71065..5071b5aa 100755
--- a/install/scripts/build.sh
+++ b/install/scripts/build.sh
@@ -4,7 +4,11 @@
 BUILD=_build/${TARGET}
 rm -rf -- ${BUILD}
 mkdir ${BUILD} || exit 1
-tar -zxf Downloads/${TARGET}.tar.gz --strip-components=1 --directory=${BUILD} || exit 1
+if [[ -f Downloads/${TARGET}.tar.gz ]] ; then
+  tar -zxf Downloads/${TARGET}.tar.gz --strip-components=1 --directory=${BUILD} || exit 1
+elif  [[ -f Downloads/${TARGET}.tar.bz2 ]] ; then
+  tar -jxf Downloads/${TARGET}.tar.bz2 --strip-components=1 --directory=${BUILD} || exit 1
+fi
 _install || exit 1
 rm -rf -- ${BUILD} _build/${TARGET}.log
 exit 0
diff --git a/install/scripts/install_curl.sh b/install/scripts/install_curl.sh
index c3a48024..6194a0e0 100755
--- a/install/scripts/install_curl.sh
+++ b/install/scripts/install_curl.sh
@@ -10,10 +10,4 @@ function _install()
   mv curl.ermine ${QP_ROOT}/bin/curl || return 1
 }
 
-BUILD=_build/${TARGET}
-rm -rf -- ${BUILD}
-mkdir ${BUILD} || exit 1
-tar -xvjf Downloads/${TARGET}.tar.bz2 --strip-components=1 --directory=${BUILD} || exit 1
-_install || exit 1
-rm -rf -- ${BUILD} _build/${TARGET}.log
-exit 0
\ No newline at end of file
+source scripts/build.sh
diff --git a/install/scripts/install_f77zmq.sh b/install/scripts/install_f77zmq.sh
index 8357857c..92388337 100755
--- a/install/scripts/install_f77zmq.sh
+++ b/install/scripts/install_f77zmq.sh
@@ -7,10 +7,9 @@ function _install()
   cd ..
   QP_ROOT=$PWD
   cd -
-  export C_INCLUDE_PATH="${C_INCLUDE_PATH}":"${QP_ROOT}"/lib
   set -e
   set -u
-  export ZMQ_H="${QP_ROOT}"/lib/zmq.h
+  export ZMQ_H="${QP_ROOT}"/include/zmq.h
   cd "${BUILD}"
   make -j 8                        || exit 1
   mv libf77zmq.a "${QP_ROOT}"/lib  || exit 1
diff --git a/install/scripts/install_gmp.sh b/install/scripts/install_gmp.sh
new file mode 100755
index 00000000..9aea2973
--- /dev/null
+++ b/install/scripts/install_gmp.sh
@@ -0,0 +1,17 @@
+#!/bin/bash -x
+
+TARGET=gmp
+
+function _install()
+{
+  rm -rf -- ${TARGET}
+  mkdir ${TARGET} || exit 1
+  cd ..
+  QP_ROOT=$PWD
+  cd -
+  cd ${BUILD}
+  ./configure --prefix=$QP_ROOT && make -j 8 || exit 1
+  make install || exit 1
+}
+
+source scripts/build.sh
diff --git a/install/scripts/install_m4.sh b/install/scripts/install_m4.sh
index ca62a025..5a52d757 100755
--- a/install/scripts/install_m4.sh
+++ b/install/scripts/install_m4.sh
@@ -8,8 +8,7 @@ function _install()
   QP_ROOT=$PWD
   cd -
   cd ${BUILD}
-  ./configure && make || exit 1 
-  ln -sf ${PWD}/src/m4 ${QP_ROOT}/bin || exit 1 
+  ./configure --prefix=$QP_ROOT && make || exit 1 
 }
 
 source scripts/build.sh
diff --git a/install/scripts/install_ocaml.sh b/install/scripts/install_ocaml.sh
index 913ae75d..b82216d3 100755
--- a/install/scripts/install_ocaml.sh
+++ b/install/scripts/install_ocaml.sh
@@ -5,11 +5,11 @@ QP_ROOT=$PWD
 cd -
 
 # Normal installation
-PACKAGES="core cryptokit zarith ocamlfind sexplib ZMQ"
+PACKAGES="core cryptokit.1.10 ocamlfind sexplib ZMQ"
 #ppx_sexp_conv
 
 # Needed for ZeroMQ
-export C_INCLUDE_PATH="${QP_ROOT}"/lib:"${C_INCLUDE_PATH}"
+export C_INCLUDE_PATH="${QP_ROOT}"/include:"${C_INCLUDE_PATH}"
 export LIBRARY_PATH="${QP_ROOT}"/lib:"${LIBRARY_PATH}"
 export LD_LIBRARY_PATH="${QP_ROOT}"/lib:"${LD_LIBRARY_PATH}"
 
diff --git a/install/scripts/install_patch.sh b/install/scripts/install_patch.sh
index 10522401..224ac8f8 100755
--- a/install/scripts/install_patch.sh
+++ b/install/scripts/install_patch.sh
@@ -9,11 +9,11 @@ function _install()
   QP_ROOT=$PWD
   cd -
   cd ${BUILD}
-  ./configure --prefix=${QP_ROOT}/install/${TARGET} && make || exit 1
+  ./configure --prefix=${QP_ROOT} && make || exit 1
   make install || exit 1
   cd -
   cp ${TARGET}/bin/${TARGET} ${QP_ROOT}/bin || exit 1
   rm -R -- ${TARGET} || exit 1
 }
 
-source scripts/build.sh
\ No newline at end of file
+source scripts/build.sh
diff --git a/install/scripts/install_zeromq.sh b/install/scripts/install_zeromq.sh
index 3bf2a715..f6596f9c 100755
--- a/install/scripts/install_zeromq.sh
+++ b/install/scripts/install_zeromq.sh
@@ -7,22 +7,13 @@ function _install()
   cd ..
   QP_ROOT=$PWD
   cd -
-  export C_INCLUDE_PATH="${C_INCLUDE_PATH}":./
   set -e
   set -u
   ORIG=$(pwd)
   cd "${BUILD}"
-  ./configure --without-libsodium || exit 1
+  ./configure --prefix=$QP_ROOT --without-libsodium || exit 1
   make -j 8 || exit 1
-  rm -f -- "${QP_ROOT}"/lib/libzmq.a "${QP_ROOT}"/lib/libzmq.so "${QP_ROOT}"/lib/libzmq.so.?
-  cp .libs/libzmq.a  "${QP_ROOT}"/lib 
-  cp .libs/libzmq.so "${QP_ROOT}"/lib/libzmq.so.5
-# cp src/.libs/libzmq.a  "${QP_ROOT}"/lib 
-# cp src/.libs/libzmq.so "${QP_ROOT}"/lib/libzmq.so.4
-  cp include/{zmq.h,zmq_utils.h} "${QP_ROOT}"/lib
-  cd "${QP_ROOT}"/lib
-  ln -s libzmq.so.5 libzmq.so
-# ln -s libzmq.so.4 libzmq.so
+  make install || exit 1
   cd ${ORIG}
   return 0
 }
diff --git a/install/scripts/install_zlib.sh b/install/scripts/install_zlib.sh
index 06ce67f3..ea268f2e 100755
--- a/install/scripts/install_zlib.sh
+++ b/install/scripts/install_zlib.sh
@@ -11,11 +11,8 @@ function _install()
   cd -
   cd ${BUILD}
   ./configure && make || exit 1
-  make install prefix=$QP_ROOT/install/${TARGET} || exit 1
-  ln -s -f $QP_ROOT/install/${TARGET}/lib/libz.so $QP_ROOT/lib || exit 1
-  ln -s -f $QP_ROOT/install/${TARGET}/lib/libz.a $QP_ROOT/lib || exit 1
-  ln -s -f $QP_ROOT/install/${TARGET}/include/zlib.h $QP_ROOT/lib || exit 1
-  ln -s -f $QP_ROOT/install/${TARGET}/include/zconf.h $QP_ROOT/lib || exit 1
+  ./configure --prefix=$QP_ROOT && make || exit 1 
+  make install || exit 1
 }
 
 source scripts/build.sh
diff --git a/ocaml/Pseudo.ml b/ocaml/Pseudo.ml
index 3fb4736e..7f813937 100644
--- a/ocaml/Pseudo.ml
+++ b/ocaml/Pseudo.ml
@@ -124,23 +124,27 @@ let to_string t =
 let find in_channel element =
   In_channel.seek in_channel 0L;
   
-  let element_read, old_pos =
-     ref Element.X, 
+  let loop, element_read, old_pos =
+     ref true,
+     ref None, 
      ref (In_channel.pos in_channel)
   in
-  while !element_read <> element
+
+  while !loop
   do
-    let buffer = 
-      old_pos := In_channel.pos in_channel;
-      match In_channel.input_line in_channel with
-      | Some line -> String.split ~on:' ' line
-         |> List.hd_exn
-      | None -> ""
-    in
     try
-      element_read := Element.of_string buffer
+      let buffer = 
+        old_pos := In_channel.pos in_channel;
+        match In_channel.input_line in_channel with
+        | Some line -> String.split ~on:' ' line
+          |> List.hd_exn
+        | None -> raise End_of_file
+      in
+      element_read := Some (Element.of_string buffer);
+      loop := !element_read <> (Some element) 
     with
     | Element.ElementError _ -> ()
+    | End_of_file -> loop := false
   done ;
   In_channel.seek in_channel !old_pos;
   !element_read
@@ -148,124 +152,126 @@ let find in_channel element =
 
 (** Read the Pseudopotential in GAMESS format *)
 let read_element in_channel element =
-  ignore (find in_channel element);
-
-  let rec read result =
-    match In_channel.input_line in_channel with
-    | None -> result
-    | Some line ->
-      if (String.strip line = "") then
-        result
-      else
-        read (line::result)
-  in
-
-  let data = 
-    read []
-    |> List.rev
-  in
-
-  let debug_data = 
-    String.concat ~sep:"\n" data
-  in
-
-  let decode_first_line = function
-  | first_line :: rest ->
+  match find in_channel element with
+  | Some e when e = element -> 
     begin
-      let first_line_split = 
-        String.split first_line ~on:' '
-        |> List.filter ~f:(fun x -> (String.strip x) <> "")
+      let rec read result =
+        match In_channel.input_line in_channel with
+        | None -> result
+        | Some line ->
+          if (String.strip line = "") then
+            result
+          else
+            read (line::result)
       in
-      match first_line_split with
-      | e :: "GEN" :: n :: p ->
-        {  element = Element.of_string e ;
-           n_elec  = Int.of_string n |> Positive_int.of_int ;
-           local = [] ;
-           non_local = []
-        }, rest
-      | _ -> failwith (
-        Printf.sprintf "Unable to read Pseudopotential : \n%s\n"
-        debug_data )
-    end
-  | _ -> failwith ("Error reading pseudopotential\n"^debug_data)
-  in  
 
-  let rec loop create_primitive accu = function
-  | (0,rest) -> List.rev accu, rest
-  | (n,line::rest) -> 
-    begin
-      match
-        String.split line ~on:' '
-        |> List.filter ~f:(fun x -> String.strip x <> "")
-      with
-      | c :: i :: e :: [] ->
-        let i =
-          Int.of_string i
-        in
-        let elem =
-          ( create_primitive 
-            (Float.of_string e |> AO_expo.of_float)
-            (i-2 |> R_power.of_int),
-            Float.of_string c |> AO_coef.of_float 
-          )
-        in
-        loop create_primitive (elem::accu) (n-1, rest)
+      let data = 
+        read []
+        |> List.rev
+      in
+
+      let debug_data = 
+        String.concat ~sep:"\n" data
+      in
+
+      let decode_first_line = function
+      | first_line :: rest ->
+        begin
+          let first_line_split = 
+            String.split first_line ~on:' '
+            |> List.filter ~f:(fun x -> (String.strip x) <> "")
+          in
+          match first_line_split with
+          | e :: "GEN" :: n :: p ->
+            {  element = Element.of_string e ;
+              n_elec  = Int.of_string n |> Positive_int.of_int ;
+              local = [] ;
+              non_local = []
+            }, rest
+          | _ -> failwith (
+            Printf.sprintf "Unable to read Pseudopotential : \n%s\n"
+            debug_data )
+        end
       | _ -> failwith ("Error reading pseudopotential\n"^debug_data)
-    end
-  | _ -> failwith ("Error reading pseudopotential\n"^debug_data)
-  in
+      in  
 
-  let decode_local (pseudo,data) = 
-    let decode_local_n n rest =
-      let result, rest =
-        loop Primitive_local.of_expo_r_power [] (Positive_int.to_int n,rest)
+      let rec loop create_primitive accu = function
+      | (0,rest) -> List.rev accu, rest
+      | (n,line::rest) -> 
+        begin
+          match
+            String.split line ~on:' '
+            |> List.filter ~f:(fun x -> String.strip x <> "")
+          with
+          | c :: i :: e :: [] ->
+            let i =
+              Int.of_string i
+            in
+            let elem =
+              ( create_primitive 
+                (Float.of_string e |> AO_expo.of_float)
+                (i-2 |> R_power.of_int),
+                Float.of_string c |> AO_coef.of_float 
+              )
+            in
+            loop create_primitive (elem::accu) (n-1, rest)
+          | _ -> failwith ("Error reading pseudopotential\n"^debug_data)
+        end
+      | _ -> failwith ("Error reading pseudopotential\n"^debug_data)
       in
-      { pseudo with local = result }, rest
-    in
-    match data with
-    | n :: rest ->
-        let n =
-          String.strip n
-          |> Int.of_string
-          |> Positive_int.of_int
+
+      let decode_local (pseudo,data) = 
+        let decode_local_n n rest =
+          let result, rest =
+            loop Primitive_local.of_expo_r_power [] (Positive_int.to_int n,rest)
+          in
+          { pseudo with local = result }, rest
         in
-        decode_local_n n rest
-    | _ -> failwith ("Unable to read (non-)local pseudopotential\n"^debug_data)
-  in
-
-  let decode_non_local (pseudo,data) = 
-    let decode_non_local_n proj n (pseudo,data) =
-      let result, rest = 
-        loop (Primitive_non_local.of_proj_expo_r_power proj)
-          [] (Positive_int.to_int n, data)
+        match data with
+        | n :: rest ->
+            let n =
+              String.strip n
+              |> Int.of_string
+              |> Positive_int.of_int
+            in
+            decode_local_n n rest
+        | _ -> failwith ("Unable to read (non-)local pseudopotential\n"^debug_data)
       in
-      { pseudo with non_local = pseudo.non_local @ result }, rest
-    in
-    let rec new_proj (pseudo,data) proj = 
-      match data with
-      | n :: rest ->
-          let n =
-            String.strip n
-            |> Int.of_string
-            |> Positive_int.of_int
-          in
-          let result = 
-            decode_non_local_n proj n (pseudo,rest)
-          and proj_next =
-            (Positive_int.to_int proj)+1
-            |> Positive_int.of_int
-          in
-          new_proj result proj_next
-      | _ -> pseudo
-    in
-    new_proj (pseudo,data) (Positive_int.of_int 0)
-  in
 
-  decode_first_line data
-  |> decode_local
-  |> decode_non_local 
+      let decode_non_local (pseudo,data) = 
+        let decode_non_local_n proj n (pseudo,data) =
+          let result, rest = 
+            loop (Primitive_non_local.of_proj_expo_r_power proj)
+              [] (Positive_int.to_int n, data)
+          in
+          { pseudo with non_local = pseudo.non_local @ result }, rest
+        in
+        let rec new_proj (pseudo,data) proj = 
+          match data with
+          | n :: rest ->
+              let n =
+                String.strip n
+                |> Int.of_string
+                |> Positive_int.of_int
+              in
+              let result = 
+                decode_non_local_n proj n (pseudo,rest)
+              and proj_next =
+                (Positive_int.to_int proj)+1
+                |> Positive_int.of_int
+              in
+              new_proj result proj_next
+          | _ -> pseudo
+        in
+        new_proj (pseudo,data) (Positive_int.of_int 0)
+      in
+
+      decode_first_line data
+      |> decode_local
+      |> decode_non_local 
+    end
+  | _ -> empty element
 
-     
  
 
 include To_md5
diff --git a/ocaml/qp_create_guess.ml b/ocaml/qp_create_guess.ml
index 62af57de..bebfdad3 100644
--- a/ocaml/qp_create_guess.ml
+++ b/ocaml/qp_create_guess.ml
@@ -88,8 +88,9 @@ let run ~multiplicity ezfio_file =
       ~alpha:(Elec_alpha_number.of_int alpha_new)
       ~beta:(Elec_beta_number.of_int beta_new) pair )
   in
+
   let c = 
-    Array.create ~len:(List.length determinants) (Det_coef.of_float 1.)
+    Array.init (List.length determinants) (fun _ -> Det_coef.of_float ((Random.float 2.)-.1.))
   in
  
   determinants 
diff --git a/plugins/All_singles/.gitignore b/plugins/All_singles/.gitignore
deleted file mode 100644
index cae0c971..00000000
--- a/plugins/All_singles/.gitignore
+++ /dev/null
@@ -1,32 +0,0 @@
-# Automatically created by $QP_ROOT/scripts/module/module_handler.py 
-.ninja_deps
-.ninja_log
-AO_Basis
-Bitmask
-Davidson
-Determinants
-Electrons
-Ezfio_files
-Generators_restart
-Hartree_Fock
-IRPF90_man
-IRPF90_temp
-Integrals_Bielec
-Integrals_Monoelec
-MOGuess
-MO_Basis
-Makefile
-Makefile.depend
-Nuclei
-Perturbation
-Properties
-Pseudo
-Selectors_no_sorted
-Utils
-ZMQ
-all_1h_1p
-all_singles
-ezfio_interface.irp.f
-irpf90.make
-irpf90_entities
-tags
\ No newline at end of file
diff --git a/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f b/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f
index 6844ed90..881f74c3 100644
--- a/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f
+++ b/plugins/CAS_SD_ZMQ/cassd_zmq.irp.f
@@ -5,11 +5,15 @@ program fci_zmq
   
   double precision, allocatable  :: pt2(:)
   integer                        :: degree
+  double precision               :: threshold_davidson_in
   
   allocate (pt2(N_states))
   
   pt2 = 1.d0
-  diag_algorithm = "Lapack"
+  threshold_davidson_in = threshold_davidson
+  threshold_davidson = threshold_davidson_in * 100.d0
+  SOFT_TOUCH threshold_davidson
+
   
   if (N_det > N_det_max) then
     call diagonalize_CI
@@ -33,20 +37,11 @@ program fci_zmq
   double precision               :: E_CI_before(N_states)
   
   
-  integer                        :: n_det_before
+  integer                        :: n_det_before, to_select
   print*,'Beginning the selection ...'
   E_CI_before(1:N_states) = CI_energy(1:N_states)
   
   do while ( (N_det < N_det_max) .and. (maxval(abs(pt2(1:N_states))) > pt2_max) )
-    n_det_before = N_det
-    call ZMQ_selection(max(256-N_det, N_det), pt2)
-    
-    PROVIDE  psi_coef
-    PROVIDE  psi_det
-    PROVIDE  psi_det_sorted
-
-    call diagonalize_CI
-    call save_wavefunction
     
     print *,  'N_det          = ', N_det
     print *,  'N_states       = ', N_states
@@ -71,12 +66,38 @@ program fci_zmq
     endif
     E_CI_before(1:N_states) = CI_energy(1:N_states)
     call ezfio_set_cas_sd_zmq_energy(CI_energy(1))
+    
+    n_det_before = N_det
+    to_select = 2*N_det
+    to_select = max(64-to_select, to_select)
+    to_select = min(to_select,N_det_max-n_det_before)
+    call ZMQ_selection(to_select, pt2)
+    
+    PROVIDE  psi_coef
+    PROVIDE  psi_det
+    PROVIDE  psi_det_sorted
+    
+    if (N_det == N_det_max) then
+      threshold_davidson = threshold_davidson_in
+      SOFT_TOUCH threshold_davidson
+    endif
+    call diagonalize_CI
+    call save_wavefunction
+    call ezfio_set_cas_sd_zmq_energy(CI_energy(1))
   enddo
+  
+  if (N_det < N_det_max) then
+    threshold_davidson = threshold_davidson_in
+    SOFT_TOUCH threshold_davidson
+    call diagonalize_CI
+    call save_wavefunction
+    call ezfio_set_cas_sd_zmq_energy(CI_energy(1))
+  endif
 
   integer :: exc_max, degree_min
   exc_max = 0
   print *,  'CAS determinants : ', N_det_cas
-  do i=1,min(N_det_cas,10)
+  do i=1,min(N_det_cas,20)
     do k=i,N_det_cas
       call get_excitation_degree(psi_cas(1,1,k),psi_cas(1,1,i),degree,N_int)
       exc_max = max(exc_max,degree)
@@ -108,7 +129,7 @@ program fci_zmq
   endif
   call save_wavefunction
   call ezfio_set_cas_sd_zmq_energy(CI_energy(1))
-  call ezfio_set_cas_sd_zmq_energy_pt2(E_CI_before+pt2)
+  call ezfio_set_cas_sd_zmq_energy_pt2(E_CI_before(1)+pt2(1))
 
 end
 
diff --git a/plugins/Full_CI_ZMQ/.gitignore b/plugins/Full_CI_ZMQ/.gitignore
deleted file mode 100644
index a996a508..00000000
--- a/plugins/Full_CI_ZMQ/.gitignore
+++ /dev/null
@@ -1,34 +0,0 @@
-# Automatically created by $QP_ROOT/scripts/module/module_handler.py 
-.ninja_deps
-.ninja_log
-AO_Basis
-Bitmask
-Davidson
-Determinants
-Electrons
-Ezfio_files
-Full_CI
-Generators_full
-Hartree_Fock
-IRPF90_man
-IRPF90_temp
-Integrals_Bielec
-Integrals_Monoelec
-MOGuess
-MO_Basis
-Makefile
-Makefile.depend
-Nuclei
-Perturbation
-Properties
-Pseudo
-Selectors_full
-Utils
-ZMQ
-ezfio_interface.irp.f
-fci_zmq
-irpf90.make
-irpf90_entities
-selection_davidson_slave
-selection_slave
-tags
\ No newline at end of file
diff --git a/plugins/Full_CI_ZMQ/fci_zmq.irp.f b/plugins/Full_CI_ZMQ/fci_zmq.irp.f
index c80b7410..ae0d7989 100644
--- a/plugins/Full_CI_ZMQ/fci_zmq.irp.f
+++ b/plugins/Full_CI_ZMQ/fci_zmq.irp.f
@@ -5,11 +5,15 @@ program fci_zmq
   
   double precision, allocatable  :: pt2(:)
   integer                        :: degree
+  integer                        :: n_det_before, to_select
+  double precision               :: threshold_davidson_in
   
   allocate (pt2(N_states))
   
   pt2 = 1.d0
-  diag_algorithm = "Lapack"
+  threshold_davidson_in = threshold_davidson
+  threshold_davidson = threshold_davidson_in * 100.d0
+  SOFT_TOUCH threshold_davidson
   
   if (N_det > N_det_max) then
     call diagonalize_CI
@@ -33,29 +37,11 @@ program fci_zmq
   double precision               :: E_CI_before(N_states)
   
   
-  integer                        :: n_det_before
   print*,'Beginning the selection ...'
   E_CI_before(1:N_states) = CI_energy(1:N_states)
+  n_det_before = 0
   
   do while ( (N_det < N_det_max) .and. (maxval(abs(pt2(1:N_states))) > pt2_max) )
-    n_det_before = N_det
-    call ZMQ_selection(max(1024-N_det, N_det), pt2)
-    
-    PROVIDE  psi_coef
-    PROVIDE  psi_det
-    PROVIDE  psi_det_sorted
-
-    call diagonalize_CI
-    call save_wavefunction
-    
-!    if (N_det > N_det_max) then
-!        psi_det = psi_det_sorted
-!        psi_coef = psi_coef_sorted
-!        N_det = N_det_max
-!        soft_touch N_det psi_det psi_coef
-!        call diagonalize_CI
-!        call save_wavefunction
-!    endif
     
     print *,  'N_det          = ', N_det
     print *,  'N_states       = ', N_states
@@ -79,9 +65,35 @@ program fci_zmq
       enddo
     endif
     E_CI_before(1:N_states) = CI_energy(1:N_states)
-    call ezfio_set_full_ci_zmq_energy(CI_energy)
+    call ezfio_set_full_ci_zmq_energy(CI_energy(1))
+
+    n_det_before = N_det
+    to_select = 2*N_det
+    to_select = max(64-to_select, to_select)
+    to_select = min(to_select, N_det_max-n_det_before)
+    call ZMQ_selection(to_select, pt2)
+    
+    PROVIDE  psi_coef
+    PROVIDE  psi_det
+    PROVIDE  psi_det_sorted
+
+    if (N_det == N_det_max) then
+      threshold_davidson = threshold_davidson_in
+      SOFT_TOUCH threshold_davidson
+    endif
+    call diagonalize_CI
+    call save_wavefunction
+    call ezfio_set_full_ci_zmq_energy(CI_energy(1))
   enddo
 
+  if (N_det < N_det_max) then
+      threshold_davidson = threshold_davidson_in
+      SOFT_TOUCH threshold_davidson
+      call diagonalize_CI
+      call save_wavefunction
+      call ezfio_set_full_ci_zmq_energy(CI_energy(1))
+  endif
+
   if(do_pt2_end)then
     print*,'Last iteration only to compute the PT2'
     threshold_selectors = max(threshold_selectors,threshold_selectors_pt2)
@@ -99,9 +111,11 @@ program fci_zmq
       print *,  'E+PT2    = ', E_CI_before+pt2
       print *,  '-----'
     enddo
-    call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before+pt2)
+    call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before(1)+pt2(1))
   endif
   call save_wavefunction
+  call ezfio_set_full_ci_zmq_energy(CI_energy(1))
+  call ezfio_set_full_ci_zmq_energy_pt2(E_CI_before(1)+pt2(1))
 end
 
 
diff --git a/plugins/MP2/mp2.irp.f b/plugins/MP2/mp2.irp.f
index 3a049f7b..d4721c71 100644
--- a/plugins/MP2/mp2.irp.f
+++ b/plugins/MP2/mp2.irp.f
@@ -1,4 +1,10 @@
 program mp2
+  no_vvvv_integrals = .True.
+  SOFT_TOUCH no_vvvv_integrals
+  call run
+end
+
+subroutine run
   implicit none
   double precision, allocatable  :: pt2(:), norm_pert(:)
   double precision               :: H_pert_diag, E_old
diff --git a/plugins/MP2/mp2_wf.irp.f b/plugins/MP2/mp2_wf.irp.f
index 5efbb9cd..e7419319 100644
--- a/plugins/MP2/mp2_wf.irp.f
+++ b/plugins/MP2/mp2_wf.irp.f
@@ -1,4 +1,10 @@
 program mp2_wf
+  no_vvvv_integrals = .True.
+  SOFT_TOUCH no_vvvv_integrals
+  call run
+end
+
+subroutine run
   implicit none
   BEGIN_DOC
 ! Save the MP2 wave function
diff --git a/plugins/MRCC_Utils/amplitudes.irp.f b/plugins/MRCC_Utils/amplitudes.irp.f
index 82736b8f..0e6a4cf4 100644
--- a/plugins/MRCC_Utils/amplitudes.irp.f
+++ b/plugins/MRCC_Utils/amplitudes.irp.f
@@ -66,9 +66,18 @@
 
 END_PROVIDER
 
+BEGIN_PROVIDER [ integer, n_exc_active_sze ]
+ implicit none
+ BEGIN_DOC
+ ! Dimension of arrays to avoid zero-sized arrays
+ END_DOC
+ n_exc_active_sze = max(n_exc_active,1)
+END_PROVIDER
 
- BEGIN_PROVIDER [ integer, active_excitation_to_determinants_idx, (0:N_det_ref+1, n_exc_active) ]
-&BEGIN_PROVIDER [ double precision, active_excitation_to_determinants_val, (N_states,N_det_ref+1, n_exc_active) ]
+
+
+ BEGIN_PROVIDER [ integer, active_excitation_to_determinants_idx, (0:N_det_ref+1, n_exc_active_sze) ]
+&BEGIN_PROVIDER [ double precision, active_excitation_to_determinants_val, (N_states,N_det_ref+1, n_exc_active_sze) ]
  implicit none
  BEGIN_DOC
  ! Sparse matrix A containing the matrix to transform the active excitations to
@@ -89,7 +98,7 @@ END_PROVIDER
      !$OMP shared(is_active_exc, active_hh_idx, active_pp_idx, n_exc_active)&
      !$OMP private(lref, pp, II, ok, myMask, myDet, ind, phase, wk, ppp, hh, s)
  allocate(lref(N_det_non_ref))
- !$OMP DO schedule(static,10)
+ !$OMP DO schedule(dynamic)
  do ppp=1,n_exc_active
    active_excitation_to_determinants_val(:,:,ppp) = 0d0
    active_excitation_to_determinants_idx(:,ppp)   = 0
@@ -136,10 +145,10 @@ END_PROVIDER
 END_PROVIDER
 
 
- BEGIN_PROVIDER [ integer, mrcc_AtA_ind, (N_det_ref * n_exc_active) ]
-&BEGIN_PROVIDER [ double precision, mrcc_AtA_val, (N_states, N_det_ref * n_exc_active) ]
-&BEGIN_PROVIDER [ integer, mrcc_col_shortcut, (n_exc_active) ]
-&BEGIN_PROVIDER [ integer, mrcc_N_col, (n_exc_active) ]
+ BEGIN_PROVIDER [ integer, mrcc_AtA_ind, (N_det_ref * n_exc_active_sze) ]
+&BEGIN_PROVIDER [ double precision, mrcc_AtA_val, (N_states, N_det_ref * n_exc_active_sze) ]
+&BEGIN_PROVIDER [ integer, mrcc_col_shortcut, (n_exc_active_sze) ]
+&BEGIN_PROVIDER [ integer, mrcc_N_col, (n_exc_active_sze) ]
  implicit none
  BEGIN_DOC
  ! A is active_excitation_to_determinants in At.A
@@ -170,7 +179,6 @@ END_PROVIDER
   do at_roww = 1, n_exc_active ! hh_nex
     at_row = active_pp_idx(at_roww)
     wk = 0
-    if(mod(at_roww, 100) == 0) print *, "AtA", at_row, "/", hh_nex
 
     do a_coll = 1, n_exc_active
       a_col = active_pp_idx(a_coll)
@@ -224,7 +232,7 @@ END_PROVIDER
   deallocate (A_ind_mwen, A_val_mwen, As2_val_mwen, t)
   !$OMP END PARALLEL
 
-  print *, "ATA SIZE", ata_size
+  print *, "At.A SIZE", ata_size
 
 END_PROVIDER
 
diff --git a/plugins/MRCC_Utils/davidson.irp.f b/plugins/MRCC_Utils/davidson.irp.f
index 0470960a..6bdadb24 100644
--- a/plugins/MRCC_Utils/davidson.irp.f
+++ b/plugins/MRCC_Utils/davidson.irp.f
@@ -640,8 +640,10 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   include 'constants.include.F'
   
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
-  if (N_st_diag > sze) then
-     stop 'error in Davidson : N_st_diag > sze'
+  if (N_st_diag*3 > sze) then
+     print *,  'error in Davidson :'
+     print *,  'Increase n_det_max_jacobi to ', N_st_diag*3
+     stop -1
   endif
 
   PROVIDE nuclear_repulsion
@@ -666,7 +668,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   write(iunit,'(A)') trim(write_buffer)
   write_buffer = ' Iter'
   do i=1,N_st
-    write_buffer = trim(write_buffer)//'      Energy          S^2      Residual'
+    write_buffer = trim(write_buffer)//'      Energy          S^2       Residual '
   enddo
   write(iunit,'(A)') trim(write_buffer)
   write_buffer = '===== '
@@ -715,6 +717,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   
   double precision               :: r1, r2
   do k=N_st+1,N_st_diag
+      u_in(k,k) = 10.d0
       do i=1,sze
         call random_number(r1)
         call random_number(r2)
@@ -762,10 +765,49 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
           1.d0, U, size(U,1), S, size(S,1),                &
           0.d0, s_, size(s_,1))
 
+!      ! Diagonalize S^2
+!      ! ---------------
+!
+!      call lapack_diag(s2,y,s_,size(s_,1),shift2)
+!
+!      ! Rotate H in the basis of eigenfunctions of s2
+!      ! ---------------------------------------------
+!
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('T','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+!
+!      ! Damp interaction between different spin states
+!      ! ------------------------------------------------
+!
+!      do k=1,shift2
+!        do l=1,shift2
+!          if (dabs(s2(k) - s2(l)) > 1.d0) then
+!            h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l))))
+!          endif
+!        enddo
+!      enddo
+!
+!      ! Rotate back H 
+!      ! -------------
+!
+!      call dgemm('N','T',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+
+      
       ! Diagonalize h
       ! -------------
       call lapack_diag(lambda,y,h,size(h,1),shift2)
-      
+
       ! Compute S2 for each eigenvector
       ! -------------------------------
 
@@ -781,61 +823,77 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
         s2(k) = s_(k,k) + S_z2_Sz
       enddo
 
-    if (s2_eig) then
-        logical :: state_ok(N_st_diag*davidson_sze_max)
-        do k=1,shift2
-          state_ok(k) = (dabs(s2(k)-expected_s2) < 1.d0)
-        enddo
-        do k=1,shift2
-          if (.not. state_ok(k)) then
-            do l=k+1,shift2
-              if (state_ok(l)) then
-                call dswap(shift2, y(1,k), 1, y(1,l), 1)
-                call dswap(1, s2(k), 1, s2(l), 1)
-                call dswap(1, lambda(k), 1, lambda(l), 1)
-                state_ok(k) = .True.
-                state_ok(l) = .False.
-                exit
-              endif
-            enddo
-          endif
+      if (s2_eig) then
+          logical :: state_ok(N_st_diag*davidson_sze_max)
+          do k=1,shift2
+            state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          enddo
+      else
+        do k=1,size(state_ok)
+          state_ok(k) = .True.
         enddo
       endif
 
+      do k=1,shift2
+        if (.not. state_ok(k)) then
+          do l=k+1,shift2
+            if (state_ok(l)) then
+              call dswap(shift2, y(1,k), 1, y(1,l), 1)
+              call dswap(1, s2(k), 1, s2(l), 1)
+              call dswap(1, lambda(k), 1, lambda(l), 1)
+              state_ok(k) = .True.
+              state_ok(l) = .False.
+              exit
+            endif
+          enddo
+        endif
+      enddo
 
-!      ! Compute overlap with U_in
-!      ! -------------------------
-!      
-!      integer :: coord(2), order(N_st_diag)
-!      overlap = -1.d0
-!      do k=1,shift2 
-!        do i=1,shift2
-!          overlap(k,i) = dabs(y(k,i))
-!        enddo
-!      enddo
-!      do k=1,N_st
-!        coord = maxloc(overlap)
-!        order( coord(2) )  = coord(1)
-!        overlap(coord(1),coord(2)) = -1.d0
-!      enddo
-!      overlap = y
-!      do k=1,N_st
-!       l = order(k)
-!       if (k /= l) then
-!         y(1:shift2,k) = overlap(1:shift2,l)
-!       endif
-!      enddo
-!      do k=1,N_st
-!        overlap(k,1) = lambda(k)
-!        overlap(k,2) = s2(k)
-!      enddo
-!      do k=1,N_st
-!       l = order(k)
-!       if (k /= l) then
-!         lambda(k) = overlap(l,1)
-!         s2(k) = overlap(l,2)
-!       endif
-!      enddo
+      if (state_following) then
+
+        ! Compute overlap with U_in
+        ! -------------------------
+        
+        integer                        :: order(N_st_diag)
+        double precision               :: cmax
+        overlap = -1.d0
+        do k=1,shift2
+          do i=1,shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
+        enddo
+        do k=1,N_st
+          cmax = -1.d0
+          do i=1,N_st
+            if (overlap(i,k) > cmax) then
+              cmax = overlap(i,k) 
+              order(k) = i
+            endif
+          enddo
+          do i=1,shift2
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
+          endif
+        enddo
+        do k=1,N_st
+          overlap(k,1) = lambda(k)
+          overlap(k,2) = s2(k)
+        enddo
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            lambda(k) = overlap(l,1)
+            s2(k) = overlap(l,2)
+          endif
+        enddo
+        
+      endif
 
 
       ! Express eigenvectors of h in the determinant basis
@@ -852,11 +910,31 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
       ! -----------------------
 
       do k=1,N_st_diag
-        do i=1,sze
-          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
-              * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
-             )/max(H_jj(i) - lambda (k),1.d-2)
-        enddo
+!        if (state_ok(k)) then
+          do i=1,sze
+            U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
+                * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
+              )/max(H_jj(i) - lambda (k),1.d-2)
+          enddo
+!        else
+!          ! Randomize components with bad <S2>
+!            do i=1,sze-2,2
+!              call random_number(r1)
+!              call random_number(r2)
+!              r1 = dsqrt(-2.d0*dlog(r1))
+!              r2 = dtwo_pi*r2
+!              U(i,shift2+k) = r1*dcos(r2)
+!              U(i+1,shift2+k) = r1*dsin(r2)
+!            enddo
+!            do i=sze-2+1,sze
+!              call random_number(r1)
+!              call random_number(r2)
+!              r1 = dsqrt(-2.d0*dlog(r1))
+!              r2 = dtwo_pi*r2
+!              U(i,shift2+k) = r1*dcos(r2)
+!            enddo
+!        endif
+
         if (k <= N_st) then
           residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
           to_print(1,k) = lambda(k) + nuclear_repulsion
@@ -879,20 +957,16 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
 
     enddo
 
-    if (.not.converged) then
-      iter = itermax-1
-    endif
-
     ! Re-contract to u_in
     ! -----------
     
-    do k=1,N_st_diag
-      energies(k) = lambda(k)
-    enddo
+    call dgemm('N','N', sze, N_st_diag, shift2,                    &
+        1.d0, U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
 
-    call dgemm('N','N', sze, N_st_diag, N_st_diag*iter, 1.d0,      &
-        U, size(U,1), y, size(y,1), 0.d0, u_in, size(u_in,1))
+  enddo
 
+  do k=1,N_st_diag
+    energies(k) = lambda(k)
   enddo
 
   write_buffer = '===== '
@@ -905,7 +979,7 @@ subroutine davidson_diag_hjj_sjj_mrcc(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
 
   deallocate (                                                       &
       W, residual_norm,                                              &
-      U,                                                             &
+      U, overlap,                                                    &
       c, S,                                                       &
       h,                                                             &
       y, s_, s_tmp,                                                  &
@@ -968,10 +1042,11 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i
   call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
   call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
 
+  PROVIDE delta_ij_s2 
   !$OMP PARALLEL DEFAULT(NONE)                                       &
       !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
       !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8, &
-      !$OMP  N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij,istate_in)
+      !$OMP  N_det_ref, idx_ref, N_det_non_ref, idx_non_ref, delta_ij, delta_ij_s2,istate_in)
   allocate(vt(N_st_8,n),st(N_st_8,n))
   Vt = 0.d0
   St = 0.d0
@@ -1056,6 +1131,8 @@ subroutine H_S2_u_0_mrcc_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,istate_i
         do istate=1,N_st
           vt (istate,i) = vt (istate,i) + delta_ij(istate_in,jj,ii)*ut(istate,j)
           vt (istate,j) = vt (istate,j) + delta_ij(istate_in,jj,ii)*ut(istate,i)
+          st (istate,i) = st (istate,i) + delta_ij_s2(istate_in,jj,ii)*ut(istate,j)
+          st (istate,j) = st (istate,j) + delta_ij_s2(istate_in,jj,ii)*ut(istate,i)
         enddo
     enddo
   enddo
diff --git a/plugins/MRCC_Utils/mrcc_utils.irp.f b/plugins/MRCC_Utils/mrcc_utils.irp.f
index f28ccf25..d6b9cc79 100644
--- a/plugins/MRCC_Utils/mrcc_utils.irp.f
+++ b/plugins/MRCC_Utils/mrcc_utils.irp.f
@@ -33,6 +33,7 @@ END_PROVIDER
       if (ihpsi_current(k) == 0.d0) then
         ihpsi_current(k) = 1.d-32
       endif
+!      lambda_mrcc(k,i) = psi_non_ref_coef(i,k)/ihpsi_current(k) 
       lambda_mrcc(k,i) = min(-1.d-32,psi_non_ref_coef(i,k)/ihpsi_current(k) )
       lambda_pert = 1.d0 / (psi_ref_energy_diagonalized(k)-hii)
       if (lambda_pert / lambda_mrcc(k,i)  < 0.5d0) then
@@ -77,19 +78,6 @@ BEGIN_PROVIDER [ double precision, hij_mrcc, (N_det_non_ref,N_det_ref) ]
 
 END_PROVIDER
 
-! BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ]
-!&BEGIN_PROVIDER [ double precision, delta_ii, (N_states,N_det_ref) ]
-! implicit none
-! BEGIN_DOC
-! ! Dressing matrix in N_det basis
-! END_DOC
-! integer :: i,j,m
-! delta_ij = 0.d0
-! delta_ii = 0.d0
-! call H_apply_mrcc(delta_ij,delta_ii,N_states,N_det_non_ref,N_det_ref)
-!
-!END_PROVIDER
-             
 
 BEGIN_PROVIDER [ double precision, h_matrix_dressed, (N_det,N_det,N_states) ]
  implicit none
@@ -149,8 +137,13 @@ END_PROVIDER
      
      allocate (eigenvectors(size(CI_eigenvectors_dressed,1),size(CI_eigenvectors_dressed,2)), &
      eigenvalues(size(CI_electronic_energy_dressed,1)))
+     do j=1,min(N_states,N_det)
+       do i=1,N_det
+         eigenvectors(i,j) = psi_coef(i,j)
+       enddo
+     enddo
      do mrcc_state=1,N_states
-      do j=1,min(N_states,N_det)
+      do j=mrcc_state,min(N_states,N_det)
         do i=1,N_det
           eigenvectors(i,j) = psi_coef(i,j)
         enddo
@@ -161,17 +154,15 @@ END_PROVIDER
             output_determinants,mrcc_state)
       CI_eigenvectors_dressed(1:N_det,mrcc_state) = eigenvectors(1:N_det,mrcc_state)
       CI_electronic_energy_dressed(mrcc_state) = eigenvalues(mrcc_state)
-      if (mrcc_state == 1) then
-        do k=N_states+1,N_states_diag
-          CI_eigenvectors_dressed(1:N_det,k) = eigenvectors(1:N_det,k)
-          CI_electronic_energy_dressed(k) = eigenvalues(k)
-        enddo
-      endif
-     enddo
-     call u_0_S2_u_0(CI_eigenvectors_s2_dressed,CI_eigenvectors_dressed,N_det,psi_det,N_int,&
+   enddo
+   do k=N_states+1,N_states_diag
+     CI_eigenvectors_dressed(1:N_det,k) = eigenvectors(1:N_det,k)
+     CI_electronic_energy_dressed(k) = eigenvalues(k)
+   enddo
+   call u_0_S2_u_0(CI_eigenvectors_s2_dressed,CI_eigenvectors_dressed,N_det,psi_det,N_int,&
           N_states_diag,size(CI_eigenvectors_dressed,1))
-     deallocate (eigenvectors,eigenvalues)
 
+   deallocate (eigenvectors,eigenvalues)
      
    else if (diag_algorithm == "Lapack") then
      
@@ -649,14 +640,12 @@ END_PROVIDER
   allocate(rho_mrcc_init(N_det_non_ref))
   allocate(x_new(hh_nex))
   allocate(x(hh_nex), AtB(hh_nex))
-  x = 0d0
-        
 
   do s=1,N_states
 
     AtB(:) = 0.d0
     !$OMP PARALLEL default(none) shared(k, psi_non_ref_coef, active_excitation_to_determinants_idx,&
-        !$OMP   active_excitation_to_determinants_val, x, N_det_ref, hh_nex, N_det_non_ref)          &
+        !$OMP   active_excitation_to_determinants_val, N_det_ref, hh_nex, N_det_non_ref)          &
         !$OMP private(at_row, a_col, i, j, r1, r2, wk, A_ind_mwen, A_val_mwen, a_coll, at_roww)&
         !$OMP shared(N_states,mrcc_col_shortcut, mrcc_N_col, AtB, mrcc_AtA_val, mrcc_AtA_ind, s, n_exc_active, active_pp_idx)
     
@@ -712,21 +701,19 @@ END_PROVIDER
     end do
     deallocate(lref)
 
+    do i=1,N_det_non_ref
+      rho_mrcc(i,s) = rho_mrcc_init(i) 
+    enddo
+
     x_new = x
     
     double precision               :: factor, resold
     factor = 1.d0
     resold = huge(1.d0)
 
-    do k=0,100000
-      !$OMP PARALLEL default(shared) private(cx, i, a_col, a_coll)
-      
-      !$OMP DO
-      do i=1,N_det_non_ref
-        rho_mrcc(i,s) = rho_mrcc_init(i) 
-      enddo
-      !$OMP END DO NOWAIT
-      
+    do k=0,10*hh_nex
+      res = 0.d0
+      !$OMP PARALLEL default(shared) private(cx, i, a_col, a_coll) reduction(+:res)
       !$OMP DO
       do a_coll = 1, n_exc_active
         a_col = active_pp_idx(a_coll)
@@ -735,35 +722,38 @@ END_PROVIDER
           cx = cx + x(mrcc_AtA_ind(i)) * mrcc_AtA_val(s,i)
         end do
         x_new(a_col) = AtB(a_col) + cx * factor
-      end do
-      !$OMP END DO
-
-      !$OMP END PARALLEL
-      
-      
-      res = 0.d0
-      do a_coll=1,n_exc_active
-        a_col = active_pp_idx(a_coll)
-        do j=1,N_det_non_ref
-          i = active_excitation_to_determinants_idx(j,a_coll)
-          if (i==0) exit
-          rho_mrcc(i,s) = rho_mrcc(i,s) + active_excitation_to_determinants_val(s,j,a_coll) * X_new(a_col)
-        enddo
         res = res + (X_new(a_col) - X(a_col))*(X_new(a_col) - X(a_col))
         X(a_col) = X_new(a_col)
       end do
+      !$OMP END DO
+      !$OMP END PARALLEL
+      
       if (res > resold) then
-        factor = -factor * 0.5d0
+        factor = factor * 0.5d0
       endif
       resold = res
       
-      if(mod(k, 100) == 0) then
+      if(iand(k, 4095) == 0) then
         print *, "res ", k, res
       end if
       
-      if(res < 1d-9) exit
+      if(res < 1d-10) exit
     end do
-    
+    dIj_unique(1:size(X), s) = X(1:size(X))
+
+  enddo
+
+  do s=1,N_states
+
+    do a_coll=1,n_exc_active
+      a_col = active_pp_idx(a_coll)
+      do j=1,N_det_non_ref
+        i = active_excitation_to_determinants_idx(j,a_coll)
+        if (i==0) exit
+        rho_mrcc(i,s) = rho_mrcc(i,s) + active_excitation_to_determinants_val(s,j,a_coll) * dIj_unique(a_col,s)
+      enddo
+    end do
+
     norm = 0.d0
     do i=1,N_det_non_ref
       norm = norm + rho_mrcc(i,s)*rho_mrcc(i,s)
@@ -775,122 +765,11 @@ END_PROVIDER
     enddo
     ! Norm now contains the norm of Psi + A.X
     
-    print *, k, "res : ", res, "norm : ", sqrt(norm)
-        
-!---------------
-! double precision               :: e_0, overlap
-! double precision, allocatable  :: u_0(:)
-! integer(bit_kind), allocatable :: keys_tmp(:,:,:)
-! allocate (u_0(N_det), keys_tmp(N_int,2,N_det) )
-! k=0
-! overlap = 0.d0
-! do i=1,N_det_ref
-!   k = k+1
-!   u_0(k) = psi_ref_coef(i,1)
-!   keys_tmp(:,:,k) = psi_ref(:,:,i)
-!   overlap += u_0(k)*psi_ref_coef(i,1)
-! enddo
-! norm = 0.d0
-! do i=1,N_det_non_ref
-!   k = k+1
-!   u_0(k) = psi_non_ref_coef(i,1)
-!   keys_tmp(:,:,k) = psi_non_ref(:,:,i)
-!   overlap += u_0(k)*psi_non_ref_coef(i,1)
-! enddo
-! 
-! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det)
-! print *,  'Energy of |Psi_CASSD> : ', e_0 + nuclear_repulsion, overlap
-!
-! k=0
-! overlap = 0.d0
-! do i=1,N_det_ref
-!   k = k+1
-!   u_0(k) = psi_ref_coef(i,1)
-!   keys_tmp(:,:,k) = psi_ref(:,:,i)
-!   overlap += u_0(k)*psi_ref_coef(i,1)
-! enddo
-! norm = 0.d0
-! do i=1,N_det_non_ref
-!   k = k+1
-!   ! f is such that f.\tilde{c_i} = c_i
-!   f = psi_non_ref_coef(i,1) / rho_mrcc(i,1)
-!   
-!   ! Avoid numerical instabilities
-!   f = min(f,2.d0)
-!   f = max(f,-2.d0)
-!
-!   f = 1.d0
-!
-!   u_0(k) = rho_mrcc(i,1)*f
-!   keys_tmp(:,:,k) = psi_non_ref(:,:,i)
-!   norm += u_0(k)**2
-!   overlap += u_0(k)*psi_non_ref_coef(i,1)
-! enddo
-! 
-! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det)
-! print *,  'Energy of |(1+T)Psi_0> : ', e_0 + nuclear_repulsion, overlap
-!
-! f = 1.d0/norm
-! norm = 1.d0
-! do i=1,N_det_ref
-!  norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s)
-! enddo
-! f = dsqrt(f*norm)
-! overlap = norm
-! do i=1,N_det_non_ref
-!   u_0(k) = rho_mrcc(i,1)*f
-!   overlap += u_0(k)*psi_non_ref_coef(i,1)
-! enddo
-!
-! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det)
-! print *,  'Energy of |(1+T)Psi_0> (normalized) : ', e_0 + nuclear_repulsion,  overlap
-!
-! k=0
-! overlap = 0.d0
-! do i=1,N_det_ref
-!   k = k+1
-!   u_0(k) = psi_ref_coef(i,1)
-!   keys_tmp(:,:,k) = psi_ref(:,:,i)
-!   overlap += u_0(k)*psi_ref_coef(i,1)
-! enddo
-! norm = 0.d0
-! do i=1,N_det_non_ref
-!   k = k+1
-!   ! f is such that f.\tilde{c_i} = c_i
-!   f = psi_non_ref_coef(i,1) / rho_mrcc(i,1)
-!   
-!   ! Avoid numerical instabilities
-!   f = min(f,2.d0)
-!   f = max(f,-2.d0)
-!
-!   u_0(k) = rho_mrcc(i,1)*f
-!   keys_tmp(:,:,k) = psi_non_ref(:,:,i)
-!   norm += u_0(k)**2
-!   overlap += u_0(k)*psi_non_ref_coef(i,1)
-! enddo
-! 
-! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det)
-! print *,  'Energy of |(1+T)Psi_0> (mu_i): ', e_0 + nuclear_repulsion, overlap
-!
-! f = 1.d0/norm
-! norm = 1.d0
-! do i=1,N_det_ref
-!  norm = norm - psi_ref_coef(i,s)*psi_ref_coef(i,s)
-! enddo
-! overlap = norm
-! f = dsqrt(f*norm)
-! do i=1,N_det_non_ref
-!   u_0(k) = rho_mrcc(i,1)*f
-!   overlap += u_0(k)*psi_non_ref_coef(i,1)
-! enddo
-!
-! call u_0_H_u_0(e_0,u_0,N_det,keys_tmp,N_int,1,N_det)
-! print *,  'Energy of |(1+T)Psi_0> (normalized mu_i) : ', e_0 + nuclear_repulsion, overlap
-!
-! deallocate(u_0, keys_tmp)
-!
-!---------------
+    print *, "norm : ", sqrt(norm)
+   enddo
      
+        
+   do s=1,N_states
      norm = 0.d0
      double precision               :: f
      do i=1,N_det_non_ref
@@ -898,12 +777,16 @@ END_PROVIDER
          rho_mrcc(i,s) = 1.d-32
        endif
 
-       ! f is such that f.\tilde{c_i} = c_i
-       f = psi_non_ref_coef(i,s) / rho_mrcc(i,s)
+       if (lambda_type == 2) then
+         f = 1.d0
+       else
+        ! f is such that f.\tilde{c_i} = c_i
+        f = psi_non_ref_coef(i,s) / rho_mrcc(i,s)
 
-       ! Avoid numerical instabilities
-       f = min(f,2.d0)
-       f = max(f,-2.d0)
+        ! Avoid numerical instabilities
+        f = min(f,2.d0)
+        f = max(f,-2.d0)
+      endif
 
        norm = norm + f*f *rho_mrcc(i,s)*rho_mrcc(i,s)
        rho_mrcc(i,s) = f
@@ -938,7 +821,6 @@ END_PROVIDER
      ! rho_mrcc now contains the product of the scaling factors and the
      ! normalization constant
     
-    dIj_unique(1:size(X), s) = X(1:size(X))
   end do
 
 END_PROVIDER
@@ -950,17 +832,14 @@ BEGIN_PROVIDER [ double precision, dij, (N_det_ref, N_det_non_ref, N_states) ]
   integer :: s,i,j
   double precision, external :: get_dij_index
   print *, "computing amplitudes..."
-  !$OMP PARALLEL DEFAULT(shared) PRIVATE(s,i,j)
   do s=1, N_states
-    !$OMP DO
     do i=1, N_det_non_ref
       do j=1, N_det_ref
+        !DIR$ FORCEINLINE
         dij(j, i, s) = get_dij_index(j, i, s, N_int)
       end do
     end do
-    !$OMP END DO 
   end do
-  !$OMP END PARALLEL
   print *, "done computing amplitudes"
 END_PROVIDER
 
@@ -982,7 +861,7 @@ double precision function get_dij_index(II, i, s, Nint)
   else if(lambda_type == 2) then
     call get_phase(psi_ref(1,1,II), psi_non_ref(1,1,i), phase, N_int)
     get_dij_index = get_dij(psi_ref(1,1,II), psi_non_ref(1,1,i), s, Nint) * phase
-    get_dij_index = get_dij_index 
+    get_dij_index = get_dij_index * rho_mrcc(i,s) 
   end if
 end function
 
diff --git a/plugins/MRCC_Utils/multi_state.irp.f b/plugins/MRCC_Utils/multi_state.irp.f
new file mode 100644
index 00000000..b4a2a3cb
--- /dev/null
+++ b/plugins/MRCC_Utils/multi_state.irp.f
@@ -0,0 +1,101 @@
+subroutine multi_state(CI_electronic_energy_dressed_,CI_eigenvectors_dressed_,LDA)
+  implicit none
+  BEGIN_DOC
+  ! Multi-state mixing
+  END_DOC
+  integer, intent(in)            :: LDA
+  double precision, intent(inout) :: CI_electronic_energy_dressed_(N_states)
+  double precision, intent(inout) :: CI_eigenvectors_dressed_(LDA,N_states)
+  double precision, allocatable  :: h(:,:,:), s(:,:), Psi(:,:), H_Psi(:,:,:), H_jj(:)
+  
+  allocate( h(N_states,N_states,0:N_states), s(N_states,N_states) )
+  allocate( Psi(LDA,N_states), H_Psi(LDA,N_states,0:N_states) )
+  allocate (H_jj(LDA) )
+  
+!    e_0(i) = u_dot_v(v_0(1,i),u_0(1,i),n)/u_dot_u(u_0(1,i),n)
+
+  integer                        :: i,j,k,istate
+  double precision :: U(N_states,N_states), Vt(N_states,N_states), D(N_states)
+  double precision, external :: diag_H_mat_elem
+  do istate=1,N_states
+    do i=1,N_det
+      H_jj(i) = diag_H_mat_elem(psi_det(1,1,i),N_int)
+    enddo
+
+    do i=1,N_det_ref
+      H_jj(idx_ref(i)) +=  delta_ii(istate,i)
+    enddo
+  
+    do k=1,N_states
+      do i=1,N_det
+        Psi(i,k) = CI_eigenvectors_dressed_(i,k)
+      enddo
+    enddo
+    call H_u_0_mrcc_nstates(H_Psi(1,1,istate),Psi,H_jj,N_det,psi_det,N_int,istate,N_states,LDA)
+
+    do k=1,N_states
+      do i=1,N_states
+        double precision, external :: u_dot_v
+        h(i,k,istate) = u_dot_v(Psi(1,i), H_Psi(1,k,istate), N_det)
+      enddo
+    enddo
+  enddo
+
+  do k=1,N_states
+    do i=1,N_states
+      s(i,k) = u_dot_v(Psi(1,i), Psi(1,k), N_det)
+    enddo
+  enddo
+
+  print *,  s(:,:)
+  print *,  ''
+
+  h(:,:,0) = h(:,:,1)
+  do istate=2,N_states
+    U(:,:) = h(:,:,0)
+    call dgemm('N','N',N_states,N_states,N_states,1.d0,&
+      U, size(U,1), h(1,1,istate), size(h,1), 0.d0, &
+      h(1,1,0), size(Vt,1))
+  enddo
+
+  call svd(h(1,1,0), size(h,1), U, size(U,1), D, Vt, size(Vt,1), N_states, N_states)
+  do k=1,N_states
+    D(k) = D(k)**(1./dble(N_states))
+    if (D(k) > 0.d0) then
+      D(k) = -D(k)
+    endif
+  enddo
+
+  do j=1,N_states
+    do i=1,N_states
+      h(i,j,0) = 0.d0
+      do k=1,N_states
+        h(i,j,0) += U(i,k) * D(k) * Vt(k,j)
+      enddo
+    enddo
+  enddo
+
+  print *,  h(:,:,0)
+  print *,''
+
+  integer :: LWORK, INFO
+  double precision, allocatable :: WORK(:)
+  LWORK=3*N_states
+  allocate (WORK(LWORK))
+  call dsygv(1, 'V', 'U', N_states, h(1,1,0), size(h,1), s, size(s,1), D, WORK, LWORK, INFO)
+  deallocate(WORK)
+
+   do j=1,N_states
+     do i=1,N_det
+       CI_eigenvectors_dressed_(i,j) = 0.d0
+       do k=1,N_states
+         CI_eigenvectors_dressed_(i,j) += Psi(i,k) * h(k,j,0)
+       enddo
+     enddo
+     CI_electronic_energy_dressed_(j) = D(j)
+   enddo
+
+
+  deallocate (h,s, H_jj)
+  deallocate( Psi, H_Psi )
+end
diff --git a/src/MRPT_Utils/EZFIO.cfg b/plugins/MRPT_Utils/EZFIO.cfg
similarity index 100%
rename from src/MRPT_Utils/EZFIO.cfg
rename to plugins/MRPT_Utils/EZFIO.cfg
diff --git a/src/MRPT_Utils/H_apply.irp.f b/plugins/MRPT_Utils/H_apply.irp.f
similarity index 100%
rename from src/MRPT_Utils/H_apply.irp.f
rename to plugins/MRPT_Utils/H_apply.irp.f
diff --git a/src/MRPT_Utils/NEEDED_CHILDREN_MODULES b/plugins/MRPT_Utils/NEEDED_CHILDREN_MODULES
similarity index 100%
rename from src/MRPT_Utils/NEEDED_CHILDREN_MODULES
rename to plugins/MRPT_Utils/NEEDED_CHILDREN_MODULES
diff --git a/src/MRPT_Utils/README.rst b/plugins/MRPT_Utils/README.rst
similarity index 100%
rename from src/MRPT_Utils/README.rst
rename to plugins/MRPT_Utils/README.rst
diff --git a/src/MRPT_Utils/energies_cas.irp.f b/plugins/MRPT_Utils/energies_cas.irp.f
similarity index 100%
rename from src/MRPT_Utils/energies_cas.irp.f
rename to plugins/MRPT_Utils/energies_cas.irp.f
diff --git a/src/MRPT_Utils/excitations_cas.irp.f b/plugins/MRPT_Utils/excitations_cas.irp.f
similarity index 100%
rename from src/MRPT_Utils/excitations_cas.irp.f
rename to plugins/MRPT_Utils/excitations_cas.irp.f
diff --git a/plugins/MRPT_Utils/ezfio_interface.irp.f b/plugins/MRPT_Utils/ezfio_interface.irp.f
new file mode 100644
index 00000000..6bd8931d
--- /dev/null
+++ b/plugins/MRPT_Utils/ezfio_interface.irp.f
@@ -0,0 +1,23 @@
+! DO NOT MODIFY BY HAND
+! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py
+! from file /home/scemama/quantum_package/src/MRPT_Utils/EZFIO.cfg
+
+
+BEGIN_PROVIDER [ logical, do_third_order_1h1p  ]
+  implicit none
+  BEGIN_DOC
+! If true, compute the third order contribution for the 1h1p
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  
+  call ezfio_has_mrpt_utils_do_third_order_1h1p(has)
+  if (has) then
+    call ezfio_get_mrpt_utils_do_third_order_1h1p(do_third_order_1h1p)
+  else
+    print *, 'mrpt_utils/do_third_order_1h1p not found in EZFIO file'
+    stop 1
+  endif
+
+END_PROVIDER
diff --git a/src/MRPT_Utils/fock_like_operators.irp.f b/plugins/MRPT_Utils/fock_like_operators.irp.f
similarity index 100%
rename from src/MRPT_Utils/fock_like_operators.irp.f
rename to plugins/MRPT_Utils/fock_like_operators.irp.f
diff --git a/src/MRPT_Utils/give_2h2p.irp.f b/plugins/MRPT_Utils/give_2h2p.irp.f
similarity index 100%
rename from src/MRPT_Utils/give_2h2p.irp.f
rename to plugins/MRPT_Utils/give_2h2p.irp.f
diff --git a/src/MRPT_Utils/mrpt_dress.irp.f b/plugins/MRPT_Utils/mrpt_dress.irp.f
similarity index 100%
rename from src/MRPT_Utils/mrpt_dress.irp.f
rename to plugins/MRPT_Utils/mrpt_dress.irp.f
diff --git a/src/MRPT_Utils/mrpt_utils.irp.f b/plugins/MRPT_Utils/mrpt_utils.irp.f
similarity index 100%
rename from src/MRPT_Utils/mrpt_utils.irp.f
rename to plugins/MRPT_Utils/mrpt_utils.irp.f
diff --git a/src/MRPT_Utils/new_way.irp.f b/plugins/MRPT_Utils/new_way.irp.f
similarity index 100%
rename from src/MRPT_Utils/new_way.irp.f
rename to plugins/MRPT_Utils/new_way.irp.f
diff --git a/src/MRPT_Utils/new_way_second_order_coef.irp.f b/plugins/MRPT_Utils/new_way_second_order_coef.irp.f
similarity index 100%
rename from src/MRPT_Utils/new_way_second_order_coef.irp.f
rename to plugins/MRPT_Utils/new_way_second_order_coef.irp.f
diff --git a/src/MRPT_Utils/psi_active_prov.irp.f b/plugins/MRPT_Utils/psi_active_prov.irp.f
similarity index 100%
rename from src/MRPT_Utils/psi_active_prov.irp.f
rename to plugins/MRPT_Utils/psi_active_prov.irp.f
diff --git a/src/MRPT_Utils/second_order_new.irp.f b/plugins/MRPT_Utils/second_order_new.irp.f
similarity index 100%
rename from src/MRPT_Utils/second_order_new.irp.f
rename to plugins/MRPT_Utils/second_order_new.irp.f
diff --git a/src/MRPT_Utils/second_order_new_2p.irp.f b/plugins/MRPT_Utils/second_order_new_2p.irp.f
similarity index 100%
rename from src/MRPT_Utils/second_order_new_2p.irp.f
rename to plugins/MRPT_Utils/second_order_new_2p.irp.f
diff --git a/src/MRPT_Utils/utils_bitmask.irp.f b/plugins/MRPT_Utils/utils_bitmask.irp.f
similarity index 100%
rename from src/MRPT_Utils/utils_bitmask.irp.f
rename to plugins/MRPT_Utils/utils_bitmask.irp.f
diff --git a/plugins/Psiref_threshold/psi_ref.irp.f b/plugins/Psiref_threshold/psi_ref.irp.f
index 5e722822..ee69ef5c 100644
--- a/plugins/Psiref_threshold/psi_ref.irp.f
+++ b/plugins/Psiref_threshold/psi_ref.irp.f
@@ -6,19 +6,22 @@ use bitmasks
 &BEGIN_PROVIDER [ integer, N_det_ref ]
   implicit none
   BEGIN_DOC
-  ! Reference wave function, defined as determinants with coefficients > 0.05
+  ! Reference wave function, defined as determinants with amplitudes > 0.05
   ! idx_ref gives the indice of the ref determinant in psi_det.
   END_DOC
   integer                        :: i, k, l
   logical                        :: good
-  double precision, parameter :: threshold=0.05d0
+  double precision, parameter    :: threshold=0.05d0
+  double precision               :: t(N_states)
   N_det_ref = 0
-  t = threshold * abs_psi_coef_max
+  do l = 1, N_states
+    t(l) = threshold * abs_psi_coef_max(l)
+  enddo
   do i=1,N_det
     good = .False.
-    do l = 1, N_states
+    do l=1, N_states
      psi_ref_coef(i,l) = 0.d0
-     good = good.or.(dabs(psi_coef(i,l)) > t)
+     good = good.or.(dabs(psi_coef(i,l)) > t(l))
     enddo
     if (good) then
       N_det_ref = N_det_ref+1
diff --git a/plugins/QmcChem/e_curve_qmc.irp.f b/plugins/QmcChem/e_curve_qmc.irp.f
index 4beed3fa..169db84e 100644
--- a/plugins/QmcChem/e_curve_qmc.irp.f
+++ b/plugins/QmcChem/e_curve_qmc.irp.f
@@ -1,10 +1,12 @@
 program e_curve
   use bitmasks
   implicit none
-  integer :: i,j,k, nab, m, l
+  integer :: i,j,k, kk, nab, m, l
   double precision :: norm, E, hij, num, ci, cj
   integer, allocatable :: iorder(:)
   double precision , allocatable :: norm_sort(:)
+  PROVIDE mo_bielec_integrals_in_map
+
   nab = n_det_alpha_unique+n_det_beta_unique
   allocate ( norm_sort(0:nab), iorder(0:nab) )
 
@@ -60,7 +62,7 @@ program e_curve
     num = 0.d0
     norm = 0.d0
     m = 0
-    !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,l,det_i,det_j,ci,cj,hij) REDUCTION(+:norm,m,num)
+    !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(k,kk,l,det_i,det_j,ci,cj,hij) REDUCTION(+:norm,m,num)
     allocate( det_i(N_int,2), det_j(N_int,2))
     !$OMP DO SCHEDULE(guided)
     do k=1,n_det
@@ -68,15 +70,19 @@ program e_curve
         cycle
       endif
       ci = psi_bilinear_matrix_values(k,1)
-      det_i(:,1) = psi_det_alpha_unique(:,psi_bilinear_matrix_rows(k))
-      det_i(:,2) = psi_det_beta_unique(:,psi_bilinear_matrix_columns(k))
+      do kk=1,N_int
+        det_i(kk,1) = psi_det_alpha_unique(kk,psi_bilinear_matrix_rows(k))
+        det_i(kk,2) = psi_det_beta_unique(kk,psi_bilinear_matrix_columns(k))
+      enddo
       do l=1,n_det
         if (psi_bilinear_matrix_values(l,1) == 0.d0) then
           cycle
         endif
         cj = psi_bilinear_matrix_values(l,1)
-        det_j(:,1) = psi_det_alpha_unique(:,psi_bilinear_matrix_rows(l))
-        det_j(:,2) = psi_det_beta_unique(:,psi_bilinear_matrix_columns(l))
+        do kk=1,N_int
+          det_j(kk,1) = psi_det_alpha_unique(kk,psi_bilinear_matrix_rows(l))
+          det_j(kk,2) = psi_det_beta_unique(kk,psi_bilinear_matrix_columns(l))
+        enddo
         call i_h_j(det_i, det_j, N_int, hij)
         num = num + ci*cj*hij
       enddo
diff --git a/plugins/mrcc_selected/dressing.irp.f b/plugins/mrcc_selected/dressing.irp.f
new file mode 100644
index 00000000..c772e2aa
--- /dev/null
+++ b/plugins/mrcc_selected/dressing.irp.f
@@ -0,0 +1,1076 @@
+use bitmasks
+
+
+
+ BEGIN_PROVIDER [ double precision, delta_ij_mrcc, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_mrcc, (N_states, N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ij_s2_mrcc, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_s2_mrcc, (N_states, N_det_ref) ]
+  use bitmasks
+  implicit none
+  integer :: gen, h, p, n, t, i, h1, h2, p1, p2, s1, s2, iproc
+  integer(bit_kind) :: mask(N_int, 2), omask(N_int, 2)
+  integer(bit_kind),allocatable :: buf(:,:,:)
+  logical :: ok
+  logical, external :: detEq
+  
+  delta_ij_mrcc = 0d0
+  delta_ii_mrcc = 0d0
+  delta_ij_s2_mrcc = 0d0
+  delta_ii_s2_mrcc = 0d0
+  PROVIDE dij
+  provide hh_shortcut psi_det_size! lambda_mrcc
+  !$OMP PARALLEL DO default(none)  schedule(dynamic) &
+  !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) &
+  !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc, delta_ii_s2_mrcc, delta_ij_s2_mrcc) &
+  !$OMP private(h, n, mask, omask, buf, ok, iproc)
+  do gen= 1, N_det_generators
+    allocate(buf(N_int, 2, N_det_non_ref))
+    iproc = omp_get_thread_num() + 1
+    if(mod(gen, 1000) == 0) print *, "mrcc ", gen, "/", N_det_generators
+    do h=1, hh_shortcut(0)
+      call apply_hole_local(psi_det_generators(1,1,gen), hh_exists(1, h), mask, ok, N_int)
+      if(.not. ok) cycle
+      omask = 0_bit_kind
+      if(hh_exists(1, h) /= 0) omask = mask
+      n = 1
+      do p=hh_shortcut(h), hh_shortcut(h+1)-1
+        call apply_particle_local(mask, pp_exists(1, p), buf(1,1,n), ok, N_int)
+        if(ok) n = n + 1
+        if(n > N_det_non_ref) stop "MRCC..."
+      end do
+      n = n - 1
+
+      if(n /= 0) then
+        call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask)
+      endif
+
+    end do
+    deallocate(buf)
+  end do
+  !$OMP END PARALLEL DO
+END_PROVIDER
+
+
+! subroutine blit(b1, b2)
+!   double precision :: b1(N_states,N_det_non_ref,N_det_ref), b2(N_states,N_det_non_ref,N_det_ref)
+!   b1 = b1 + b2
+! end subroutine
+
+
+subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask)
+ use bitmasks
+ implicit none
+
+  integer, intent(in)            :: i_generator,n_selected, Nint
+  double precision, intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref)
+  double precision, intent(inout) :: delta_ii_(N_states,N_det_ref)
+  double precision, intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref)
+  double precision, intent(inout) :: delta_ii_s2_(N_states,N_det_ref)
+
+  integer(bit_kind), intent(in)  :: det_buffer(Nint,2,n_selected)
+  integer                        :: i,j,k,l,m
+  integer,allocatable            :: idx_alpha(:), degree_alpha(:)
+  logical                        :: good, fullMatch
+
+  integer(bit_kind),allocatable  :: tq(:,:,:)
+  integer                        :: N_tq, c_ref ,degree
+
+  double precision               :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states)
+  double precision, allocatable  :: dIa_hla(:,:), dIa_sla(:,:)
+  double precision               :: haj, phase, phase2
+  double precision               :: f(N_states), ci_inv(N_states)
+  integer                        :: exc(0:2,2,2)
+  integer                        :: h1,h2,p1,p2,s1,s2
+  integer(bit_kind)              :: tmp_det(Nint,2)
+  integer                        :: iint, ipos
+  integer                        :: i_state, k_sd, l_sd, i_I, i_alpha
+  
+  integer(bit_kind),allocatable  :: miniList(:,:,:)
+  integer(bit_kind),intent(in)   :: key_mask(Nint, 2)
+  integer,allocatable            :: idx_miniList(:)
+  integer                        :: N_miniList, ni, leng
+  double precision, allocatable  :: hij_cache(:), sij_cache(:)
+  
+  integer(bit_kind), allocatable :: microlist(:,:,:), microlist_zero(:,:,:)
+  integer, allocatable           :: idx_microlist(:), N_microlist(:), ptr_microlist(:), idx_microlist_zero(:)
+  integer :: mobiles(2), smallerlist
+  logical, external :: detEq, is_generable
+  !double precision, external :: get_dij, get_dij_index
+  
+
+  leng = max(N_det_generators, N_det_non_ref)
+  allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref), sij_cache(N_det_non_ref))
+  allocate(idx_alpha(0:psi_det_size), degree_alpha(psi_det_size))
+  !create_minilist_find_previous(key_mask, fullList, miniList, N_fullList, N_miniList, fullMatch, Nint)
+  call create_minilist_find_previous(key_mask, psi_det_generators, miniList, i_generator-1, N_miniList, fullMatch, Nint)
+  
+!   if(fullMatch) then
+!     return
+!   end if
+  
+  allocate(ptr_microlist(0:mo_tot_num*2+1),  &
+      N_microlist(0:mo_tot_num*2) )
+  allocate(   microlist(Nint,2,N_minilist*4),               &
+    idx_microlist(N_minilist*4))
+  
+  if(key_mask(1,1) /= 0) then
+    call create_microlist(miniList, N_minilist, key_mask, microlist, idx_microlist, N_microlist, ptr_microlist, Nint)
+    call filter_tq_micro(i_generator,n_selected,det_buffer,Nint,tq,N_tq,microlist,ptr_microlist,N_microlist,key_mask)
+  else
+    call filter_tq(i_generator,n_selected,det_buffer,Nint,tq,N_tq,miniList,N_minilist)
+  end if
+  
+  
+  
+  deallocate(microlist, idx_microlist)
+  
+  allocate (dIa_hla(N_states,N_det_non_ref), dIa_sla(N_states,N_det_non_ref))
+  
+  ! |I>
+  
+  ! |alpha>
+  
+  if(N_tq > 0) then
+    call create_minilist(key_mask, psi_non_ref, miniList, idx_minilist, N_det_non_ref, N_minilist, Nint)
+    if(N_minilist == 0) return
+    
+    
+    if(key_mask(1,1) /= 0) then !!!!!!!!!!! PAS GENERAL !!!!!!!!!
+      allocate(microlist_zero(Nint,2,N_minilist), idx_microlist_zero(N_minilist))
+      
+      allocate(   microlist(Nint,2,N_minilist*4),               &
+        idx_microlist(N_minilist*4))
+      call create_microlist(miniList, N_minilist, key_mask, microlist, idx_microlist, N_microlist, ptr_microlist, Nint)
+      
+      
+      do i=0,mo_tot_num*2
+        do k=ptr_microlist(i),ptr_microlist(i+1)-1
+          idx_microlist(k) = idx_minilist(idx_microlist(k))
+        end do
+      end do
+      
+      do l=1,N_microlist(0)
+        do k=1,Nint
+          microlist_zero(k,1,l) = microlist(k,1,l)
+          microlist_zero(k,2,l) = microlist(k,2,l)
+        enddo
+        idx_microlist_zero(l) = idx_microlist(l)
+      enddo
+    end if
+  end if
+      
+ 
+  do i_alpha=1,N_tq
+   if(key_mask(1,1) /= 0) then
+      call getMobiles(tq(1,1,i_alpha), key_mask, mobiles, Nint) 
+      
+      if(N_microlist(mobiles(1)) < N_microlist(mobiles(2))) then
+        smallerlist = mobiles(1)
+      else
+        smallerlist = mobiles(2)
+      end if
+      
+    
+      do l=0,N_microlist(smallerlist)-1
+        microlist_zero(:,:,ptr_microlist(1) + l) = microlist(:,:,ptr_microlist(smallerlist) + l)
+        idx_microlist_zero(ptr_microlist(1) + l) = idx_microlist(ptr_microlist(smallerlist) + l)
+      end do
+      
+      call get_excitation_degree_vector(microlist_zero,tq(1,1,i_alpha),degree_alpha,Nint,N_microlist(smallerlist)+N_microlist(0),idx_alpha)
+      do j=1,idx_alpha(0)
+        idx_alpha(j) = idx_microlist_zero(idx_alpha(j))
+      end do
+      
+    else
+      call get_excitation_degree_vector(miniList,tq(1,1,i_alpha),degree_alpha,Nint,N_minilist,idx_alpha)
+      do j=1,idx_alpha(0)
+        idx_alpha(j) = idx_miniList(idx_alpha(j))
+      end do
+    end if
+    
+    
+    do l_sd=1,idx_alpha(0)
+      k_sd = idx_alpha(l_sd)
+      call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hij_cache(k_sd))
+      call get_s2(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,sij_cache(k_sd))
+    enddo
+    ! |I>
+    do i_I=1,N_det_ref
+      ! Find triples and quadruple grand parents
+      call get_excitation_degree(tq(1,1,i_alpha),psi_ref(1,1,i_I),degree,Nint)
+      if (degree > 4) then
+        cycle
+      endif
+      
+      do i_state=1,N_states
+        dIa(i_state) = 0.d0
+      enddo
+      
+      ! <I|  <>  |alpha>
+      do k_sd=1,idx_alpha(0)
+        ! Loop if lambda == 0
+        logical                        :: loop
+!         loop = .True.
+!         do i_state=1,N_states
+!           if (lambda_mrcc(i_state,idx_alpha(k_sd)) /= 0.d0) then
+!             loop = .False.
+!             exit
+!           endif
+!         enddo
+!         if (loop) then
+!           cycle
+!         endif
+        
+        call get_excitation_degree(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(k_sd)),degree,Nint)
+        if (degree > 2) then
+          cycle
+        endif
+        
+        ! <I| /k\ |alpha>
+        ! <I|H|k>
+        !hIk = hij_mrcc(idx_alpha(k_sd),i_I)
+        !         call i_h_j(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(k_sd)),Nint,hIk)
+        
+        do i_state=1,N_states
+          dIK(i_state) = dij(i_I, idx_alpha(k_sd), i_state)
+          !dIk(i_state) = get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,idx_alpha(k_sd)), N_int) !!hIk * lambda_mrcc(i_state,idx_alpha(k_sd))
+          !dIk(i_state) = psi_non_ref_coef(idx_alpha(k_sd), i_state) / psi_ref_coef(i_I, i_state)
+        enddo
+        
+        
+        ! |l> = Exc(k -> alpha) |I>
+        call get_excitation(psi_non_ref(1,1,idx_alpha(k_sd)),tq(1,1,i_alpha),exc,degree,phase,Nint)
+        call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
+        do k=1,N_int
+          tmp_det(k,1) = psi_ref(k,1,i_I)
+          tmp_det(k,2) = psi_ref(k,2,i_I)
+        enddo
+        logical :: ok
+        call apply_excitation(psi_ref(1,1,i_I), exc, tmp_det, ok, Nint)
+        if(.not. ok) cycle
+        
+        ! <I| \l/ |alpha>
+        do i_state=1,N_states
+          dka(i_state) = 0.d0
+        enddo
+        do l_sd=k_sd+1,idx_alpha(0)
+          call get_excitation_degree(tmp_det,psi_non_ref(1,1,idx_alpha(l_sd)),degree,Nint)
+          if (degree == 0) then
+            
+!             loop = .True.
+!             do i_state=1,N_states
+!               if (lambda_mrcc(i_state,idx_alpha(l_sd)) /= 0.d0) then
+!                 loop = .False.
+!                 exit
+!               endif
+!             enddo
+            loop = .false.
+            if (.not.loop) then
+              call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(l_sd)),exc,degree,phase2,Nint)
+              hIl = hij_mrcc(idx_alpha(l_sd),i_I)
+!                             call i_h_j(psi_ref(1,1,i_I),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hIl)
+              do i_state=1,N_states
+                dka(i_state) = dij(i_I, idx_alpha(l_sd), i_state) * phase * phase2
+                !dka(i_state) = get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,idx_alpha(l_sd)), N_int) * phase * phase2 !hIl * lambda_mrcc(i_state,idx_alpha(l_sd)) * phase * phase2
+                !dka(i_state) = psi_non_ref_coef(idx_alpha(l_sd), i_state) / psi_ref_coef(i_I, i_state) * phase * phase2 
+              enddo
+            endif
+
+            exit
+          endif
+        enddo
+        do i_state=1,N_states
+          dIa(i_state) = dIa(i_state) + dIk(i_state) * dka(i_state)
+        enddo
+      enddo
+     
+      do i_state=1,N_states
+        ci_inv(i_state) = psi_ref_coef_inv(i_I,i_state)
+      enddo
+      do l_sd=1,idx_alpha(0)
+        k_sd = idx_alpha(l_sd)
+        hla = hij_cache(k_sd)
+        sla = sij_cache(k_sd)
+!        call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hla)
+        do i_state=1,N_states
+          dIa_hla(i_state,k_sd) = dIa(i_state) * hla
+          dIa_sla(i_state,k_sd) = dIa(i_state) * sla
+        enddo
+      enddo
+      call omp_set_lock( psi_ref_lock(i_I) )
+      do i_state=1,N_states
+        if(dabs(psi_ref_coef(i_I,i_state)).ge.1.d-3)then
+          do l_sd=1,idx_alpha(0)
+            k_sd = idx_alpha(l_sd)
+            delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd)
+            delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd)
+            delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + dIa_sla(i_state,k_sd)
+            delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd)
+          enddo
+        else
+          delta_ii_(i_state,i_I)  = 0.d0
+          do l_sd=1,idx_alpha(0)
+            k_sd = idx_alpha(l_sd)
+            delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd)
+            delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + 0.5d0*dIa_sla(i_state,k_sd)
+          enddo
+        endif
+      enddo
+      call omp_unset_lock( psi_ref_lock(i_I) )
+    enddo
+  enddo
+  deallocate (dIa_hla,dIa_sla,hij_cache,sij_cache)
+  deallocate(miniList, idx_miniList)
+end
+
+
+
+
+ BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii, (N_states, N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ij_s2, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_s2, (N_states, N_det_ref) ]
+  use bitmasks
+  implicit none
+  integer                        :: i, j, i_state
+  
+  !mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc
+  
+  if(mrmode == 3) then
+    do i = 1, N_det_ref
+      do i_state = 1, N_states
+        delta_ii(i_state,i)= delta_ii_mrcc(i_state,i)
+        delta_ii_s2(i_state,i)= delta_ii_s2_mrcc(i_state,i)
+      enddo
+      do j = 1, N_det_non_ref
+        do i_state = 1, N_states
+          delta_ij(i_state,j,i) = delta_ij_mrcc(i_state,j,i)
+          delta_ij_s2(i_state,j,i) = delta_ij_s2_mrcc(i_state,j,i)
+        enddo
+      end do
+    end do
+
+    ! =-=-= BEGIN STATE AVERAGE
+!    do i = 1, N_det_ref
+!      delta_ii(:,i)= delta_ii_mrcc(1,i)
+!      delta_ii_s2(:,i)= delta_ii_s2_mrcc(1,i)
+!      do i_state = 2, N_states
+!        delta_ii(:,i) += delta_ii_mrcc(i_state,i)
+!        delta_ii_s2(:,i) += delta_ii_s2_mrcc(i_state,i)
+!      enddo
+!      do j = 1, N_det_non_ref
+!        delta_ij(:,j,i) = delta_ij_mrcc(1,j,i)
+!        delta_ij_s2(:,j,i) = delta_ij_s2_mrcc(1,j,i)
+!        do i_state = 2, N_states
+!          delta_ij(:,j,i) += delta_ij_mrcc(i_state,j,i)
+!          delta_ij_s2(:,j,i) += delta_ij_s2_mrcc(i_state,j,i)
+!        enddo
+!      end do
+!    end do
+!    delta_ij = delta_ij * (1.d0/dble(N_states))
+!    delta_ii = delta_ii * (1.d0/dble(N_states))
+    ! =-=-= END STATE AVERAGE
+    !
+    !       do i = 1, N_det_ref
+    !         delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) - delta_sub_ii(i,i_state)
+    !         do j = 1, N_det_non_ref
+    !           delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state) - delta_sub_ij(i,j,i_state)
+    !         end do
+    !       end do
+  else if(mrmode == 2) then
+    do i = 1, N_det_ref
+      do i_state = 1, N_states
+        delta_ii(i_state,i)= delta_ii_old(i_state,i)
+        delta_ii_s2(i_state,i)= delta_ii_s2_old(i_state,i)
+      enddo
+      do j = 1, N_det_non_ref
+        do i_state = 1, N_states
+          delta_ij(i_state,j,i) = delta_ij_old(i_state,j,i)
+          delta_ij_s2(i_state,j,i) = delta_ij_s2_old(i_state,j,i)
+        enddo
+      end do
+    end do
+  else if(mrmode == 1) then
+    do i = 1, N_det_ref
+      do i_state = 1, N_states
+        delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state)
+        delta_ii_s2(i_state,i)= delta_mrcepa0_ii_s2(i,i_state)
+      enddo
+      do j = 1, N_det_non_ref
+        do i_state = 1, N_states
+          delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state)
+          delta_ij_s2(i_state,j,i) = delta_mrcepa0_ij_s2(i,j,i_state)
+        enddo
+      end do
+    end do
+  else
+    stop "invalid mrmode"
+  end if
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ integer, HP, (2,N_det_non_ref) ]
+  integer :: i
+  do i=1,N_det_non_ref
+    call getHP(psi_non_ref(1,1,i), HP(1,i), HP(2,i), N_int)
+  end do 
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, cepa0_shortcut, (0:N_det_non_ref+1) ]
+&BEGIN_PROVIDER [ integer, det_cepa0_idx, (N_det_non_ref) ]
+&BEGIN_PROVIDER [ integer(bit_kind), det_cepa0_active, (N_int,2,N_det_non_ref) ]
+&BEGIN_PROVIDER [ integer(bit_kind), det_ref_active, (N_int,2,N_det_ref) ]
+&BEGIN_PROVIDER [ integer(bit_kind), active_sorb, (N_int,2) ]
+&BEGIN_PROVIDER [ integer(bit_kind), det_cepa0, (N_int,2,N_det_non_ref) ]
+&BEGIN_PROVIDER [ integer, nlink, (N_det_ref) ]
+&BEGIN_PROVIDER [ integer, linked, (N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ integer, blokMwen, (N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, searchance, (N_det_ref) ]
+&BEGIN_PROVIDER [ integer, child_num, (N_det_non_ref,N_det_ref) ]
+
+  use bitmasks
+  implicit none
+  
+  integer(bit_kind),allocatable :: det_noactive(:,:,:)
+  integer, allocatable :: shortcut(:), idx(:)
+  integer(bit_kind) :: nonactive_sorb(N_int,2), det(N_int, 2)
+  integer i, II, j, k, n, ni, blok, degree
+  logical, external :: detEq
+  
+  allocate(det_noactive(N_int, 2, N_det_non_ref))
+  allocate(idx(N_det_non_ref), shortcut(0:N_det_non_ref+1))
+  print *, "pre start"
+  active_sorb(:,:) = 0_8
+  nonactive_sorb(:,:) = not(0_8)
+  
+  if(N_det_ref > 1) then
+    do i=1, N_det_ref
+    do k=1, N_int
+      active_sorb(k,1) = ior(psi_ref(k,1,i), active_sorb(k,1))
+      active_sorb(k,2) = ior(psi_ref(k,2,i), active_sorb(k,2))
+      nonactive_sorb(k,1) = iand(psi_ref(k,1,i), nonactive_sorb(k,1))
+      nonactive_sorb(k,2) = iand(psi_ref(k,2,i), nonactive_sorb(k,2))
+    end do
+    end do
+    do k=1, N_int
+      active_sorb(k,1) = iand(active_sorb(k,1), not(nonactive_sorb(k,1)))
+      active_sorb(k,2) = iand(active_sorb(k,2), not(nonactive_sorb(k,2)))
+    end do
+  end if
+  
+  
+  do i=1, N_det_non_ref
+  do k=1, N_int
+    det_noactive(k,1,i) = iand(psi_non_ref(k,1,i), not(active_sorb(k,1)))
+    det_noactive(k,2,i) = iand(psi_non_ref(k,2,i), not(active_sorb(k,2))) 
+  end do
+  end do
+  
+  call sort_dets_ab(det_noactive, det_cepa0_idx, cepa0_shortcut, N_det_non_ref, N_int)
+  
+  do i=1,N_det_non_ref
+    det_cepa0(:,:,i) = psi_non_ref(:,:,det_cepa0_idx(i))
+  end do
+  
+  cepa0_shortcut(0) = 1
+  cepa0_shortcut(1) = 1
+  do i=2,N_det_non_ref
+    if(.not. detEq(det_noactive(1,1,i), det_noactive(1,1,i-1), N_int)) then
+      cepa0_shortcut(0) += 1
+      cepa0_shortcut(cepa0_shortcut(0)) = i
+    end if
+  end do
+  cepa0_shortcut(cepa0_shortcut(0)+1) = N_det_non_ref+1
+  
+  if(.true.) then 
+  do i=1,cepa0_shortcut(0)
+    n = cepa0_shortcut(i+1) - cepa0_shortcut(i)
+    call sort_dets_ab(det_cepa0(1,1,cepa0_shortcut(i)), idx, shortcut, n, N_int)
+    do k=1,n
+      idx(k) = det_cepa0_idx(cepa0_shortcut(i)-1+idx(k))
+    end do
+    det_cepa0_idx(cepa0_shortcut(i):cepa0_shortcut(i)+n-1) = idx(:n)
+  end do
+  end if
+  
+  
+  do i=1,N_det_ref
+  do k=1, N_int
+    det_ref_active(k,1,i) = iand(psi_ref(k,1,i), active_sorb(k,1))
+    det_ref_active(k,2,i) = iand(psi_ref(k,2,i), active_sorb(k,2))
+  end do
+  end do
+  
+  do i=1,N_det_non_ref
+  do k=1, N_int
+    det_cepa0_active(k,1,i) = iand(psi_non_ref(k,1,det_cepa0_idx(i)), active_sorb(k,1))
+    det_cepa0_active(k,2,i) = iand(psi_non_ref(k,2,det_cepa0_idx(i)), active_sorb(k,2))
+  end do
+  end do
+
+  do i=1,N_det_non_ref
+    if(.not. detEq(psi_non_ref(1,1,det_cepa0_idx(i)), det_cepa0(1,1,i),N_int)) stop "STOOOP"
+  end do
+  
+  searchance = 0d0
+  child_num = 0
+  do J = 1, N_det_ref
+    nlink(J) = 0
+    do blok=1,cepa0_shortcut(0)
+    do k=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1
+      call get_excitation_degree(psi_ref(1,1,J),det_cepa0(1,1,k),degree,N_int)
+      if(degree <= 2) then
+        nlink(J) += 1
+        linked(nlink(J),J) = k
+        child_num(k, J) = nlink(J)
+        blokMwen(nlink(J),J) = blok
+        searchance(J) += 1d0 + log(dfloat(cepa0_shortcut(blok+1) - cepa0_shortcut(blok)))
+      end if
+    end do
+    end do
+  end do
+  print *, "pre done"
+END_PROVIDER
+
+ 
+!  BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ]
+!   use bitmasks
+!   implicit none
+!   integer :: i,j,k
+!   double precision :: Hjk, Hki, Hij, pre(N_det_ref), wall
+!   integer :: i_state, degree, npre, ipre(N_det_ref), npres(N_det_ref)
+!   
+! !   provide lambda_mrcc
+!   npres = 0
+!   delta_cas = 0d0
+!   call wall_time(wall)
+!   print *, "dcas ", wall
+!   do i_state = 1, N_states
+!     !!$OMP PARALLEL DO default(none) schedule(dynamic) private(pre,npre,ipre,j,k,Hjk,Hki,degree) shared(npres,lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,N_det_ref)
+!     do k=1,N_det_non_ref
+!       if(lambda_mrcc(i_state, k) == 0d0) cycle
+!       npre = 0
+!       do i=1,N_det_ref
+!         call i_h_j(psi_non_ref(1,1,k),psi_ref(1,1,i), N_int,Hki)
+!         if(Hki /= 0d0) then
+!           !!$OMP ATOMIC
+!           npres(i) += 1
+!           npre += 1
+!           ipre(npre) = i
+!           pre(npre) = Hki
+!         end if
+!       end do
+!       
+!       
+!       do i=1,npre
+!       do j=1,i
+!         !!$OMP ATOMIC
+!         delta_cas(ipre(i),ipre(j),i_state) += pre(i) * pre(j) * lambda_mrcc(i_state, k)
+!       end do
+!       end do
+!     end do
+!     !!$OMP END PARALLEL DO
+!     npre=0
+!     do i=1,N_det_ref
+!       npre += npres(i)
+!     end do
+!     !stop
+!     do i=1,N_det_ref
+!     do j=1,i
+!         delta_cas(j,i,i_state) = delta_cas(i,j,i_state)
+!     end do
+!     end do 
+!   end do
+!   
+!   call wall_time(wall)
+!   print *, "dcas", wall
+! !   stop
+!  END_PROVIDER
+ 
+ 
+ BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_cas_s2, (N_det_ref, N_det_ref, N_states) ]
+  use bitmasks
+  implicit none
+  integer :: i,j,k
+  double precision :: Sjk,Hjk, Hki, Hij
+  !double precision, external :: get_dij
+  integer i_state, degree
+   
+  provide lambda_mrcc dIj
+  do i_state = 1, N_states
+    !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Sjk,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,delta_cas_s2,N_det_ref,dij)
+    do i=1,N_det_ref
+      do j=1,i
+        call get_excitation_degree(psi_ref(1,1,i), psi_ref(1,1,j), degree, N_int)
+        delta_cas(i,j,i_state) = 0d0
+        delta_cas_s2(i,j,i_state) = 0d0
+        do k=1,N_det_non_ref
+
+          call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk)
+          call get_s2(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Sjk)
+          
+          delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k)
+          delta_cas_s2(i,j,i_state) += Sjk * dij(i, k, i_state) ! * Ski * lambda_mrcc(i_state, k)
+        end do
+        delta_cas(j,i,i_state) = delta_cas(i,j,i_state)
+        delta_cas_s2(j,i,i_state) = delta_cas_s2(i,j,i_state)
+      end do
+    end do
+    !$OMP END PARALLEL DO
+  end do
+ END_PROVIDER
+ 
+ 
+
+
+logical function isInCassd(a,Nint)
+  use bitmasks
+  implicit none
+  integer, intent(in) :: Nint
+  integer(bit_kind), intent(in) :: a(Nint,2)
+  integer(bit_kind) :: inac, virt
+  integer :: ni, i, deg
+
+  
+  isInCassd = .false.
+     
+  deg = 0
+   do i=1,2
+    do ni=1,Nint
+      virt = iand(not(HF_bitmask(ni,i)), not(active_sorb(ni,i)))
+      deg += popcnt(iand(virt, a(ni,i)))
+      if(deg > 2) return
+    end do
+  end do
+  
+  deg = 0
+  do i=1,2
+    do ni=1,Nint
+      inac = iand(HF_bitmask(ni,i), not(active_sorb(ni,i)))
+      deg += popcnt(xor(iand(inac,a(ni,i)), inac))
+      if(deg > 2) return
+    end do
+  end do
+  isInCassd = .true.
+end function
+
+
+subroutine getHP(a,h,p,Nint)
+  use bitmasks
+  implicit none
+  integer, intent(in) :: Nint
+  integer(bit_kind), intent(in) :: a(Nint,2)
+  integer, intent(out) :: h, p
+  integer(bit_kind) :: inac, virt
+  integer :: ni, i, deg
+
+  
+  !isInCassd = .false.
+  h = 0
+  p = 0
+   
+  deg = 0
+  lp : do i=1,2
+    do ni=1,Nint
+      virt = iand(not(HF_bitmask(ni,i)), not(active_sorb(ni,i)))
+      deg += popcnt(iand(virt, a(ni,i)))
+      if(deg > 2) exit lp
+    end do
+  end do lp
+  p = deg
+
+  deg = 0
+  lh : do i=1,2
+    do ni=1,Nint
+      inac = iand(HF_bitmask(ni,i), not(active_sorb(ni,i)))
+      deg += popcnt(xor(iand(inac,a(ni,i)), inac))
+      if(deg > 2) exit lh
+    end do
+  end do lh
+  h = deg
+  !isInCassd = .true.
+end function
+
+
+ BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij, (N_det_ref,N_det_non_ref,N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii, (N_det_ref,N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij_s2, (N_det_ref,N_det_non_ref,N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii_s2, (N_det_ref,N_states) ]
+  use bitmasks
+  implicit none
+  
+  integer                         :: i_state, i, i_I, J, k, degree, degree2, m, l, deg, ni
+  integer                         :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_, sortRefIdx(N_det_ref)
+  logical                         :: ok
+  double precision                :: phase_iI, phase_Ik, phase_Jl, phase_IJ, phase_al, diI, hIi, hJi, delta_JI, dkI(1), HkI, ci_inv(1), dia_hla(1)
+  double precision                :: contrib, contrib2,  contrib_s2, contrib2_s2,  HIIi, HJk, wall
+  integer, dimension(0:2,2,2)     :: exc_iI, exc_Ik, exc_IJ
+  integer(bit_kind)               :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2)
+  integer(bit_kind),allocatable   :: sortRef(:,:,:)
+  integer, allocatable            :: idx_sorted_bit(:)
+  integer, external               :: get_index_in_psi_det_sorted_bit, searchDet
+  logical, external               :: is_in_wavefunction, detEq
+  !double precision, external      :: get_dij
+  integer :: II, blok
+  integer*8, save :: notf = 0
+
+  call wall_time(wall)
+  allocate(idx_sorted_bit(N_det), sortRef(N_int,2,N_det_ref))
+  
+  sortRef(:,:,:) = det_ref_active(:,:,:)
+  call sort_det(sortRef, sortRefIdx, N_det_ref, N_int)
+  
+  idx_sorted_bit(:) = -1
+  do i=1,N_det_non_ref
+    idx_sorted_bit(get_index_in_psi_det_sorted_bit(psi_non_ref(1,1,i), N_int)) = i
+  enddo
+    
+  ! To provide everything
+  contrib = dij(1, 1, 1)
+  
+  delta_mrcepa0_ii(:,:) = 0d0
+  delta_mrcepa0_ij(:,:,:) = 0d0
+  delta_mrcepa0_ii_s2(:,:) = 0d0
+  delta_mrcepa0_ij_s2(:,:,:) = 0d0
+
+  do i_state = 1, N_states
+    !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii, delta_mrcepa0_ij_s2, delta_mrcepa0_ii_s2)       &
+    !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2,contrib_s2,contrib2_s2) &
+    !$OMP shared(active_sorb, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef, cepa0_shortcut, det_cepa0_active)     &
+    !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas, delta_cas_s2) &
+    !$OMP shared(notf,i_state, sortRef, sortRefIdx, dij)
+    do blok=1,cepa0_shortcut(0)
+    do i=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1
+      do II=1,N_det_ref
+        call get_excitation_degree(psi_ref(1,1,II),psi_non_ref(1,1,det_cepa0_idx(i)),degree,N_int)
+        if (degree > 2 ) cycle
+        
+        do ni=1,N_int
+          made_hole(ni,1) = iand(det_ref_active(ni,1,II), xor(det_cepa0_active(ni,1,i), det_ref_active(ni,1,II)))
+          made_hole(ni,2) = iand(det_ref_active(ni,2,II), xor(det_cepa0_active(ni,2,i), det_ref_active(ni,2,II)))
+          
+          made_particle(ni,1) = iand(det_cepa0_active(ni,1,i), xor(det_cepa0_active(ni,1,i), det_ref_active(ni,1,II)))
+          made_particle(ni,2) = iand(det_cepa0_active(ni,2,i), xor(det_cepa0_active(ni,2,i), det_ref_active(ni,2,II)))
+        end do
+      
+        
+  kloop: do k=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1 !i
+          !if(lambda_mrcc(i_state, det_cepa0_idx(k)) == 0d0) cycle
+          
+          do ni=1,N_int
+            if(iand(made_hole(ni,1), det_cepa0_active(ni,1,k)) /= 0) cycle kloop
+            if(iand(made_particle(ni,1), det_cepa0_active(ni,1,k)) /= made_particle(ni,1)) cycle kloop
+            if(iand(made_hole(ni,2), det_cepa0_active(ni,2,k)) /= 0) cycle kloop
+            if(iand(made_particle(ni,2), det_cepa0_active(ni,2,k)) /= made_particle(ni,2)) cycle kloop
+          end do
+          do ni=1,N_int
+            myActive(ni,1) = xor(det_cepa0_active(ni,1,k), made_hole(ni,1))
+            myActive(ni,1) = xor(myActive(ni,1), made_particle(ni,1))
+            myActive(ni,2) = xor(det_cepa0_active(ni,2,k), made_hole(ni,2))
+            myActive(ni,2) = xor(myActive(ni,2), made_particle(ni,2))
+          end do
+          
+          j = searchDet(sortRef, myActive, N_det_ref, N_int)
+          if(j == -1) then
+           cycle
+          end if
+          j = sortRefIdx(j)
+          !$OMP ATOMIC
+          notf = notf+1
+
+!          call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk)
+          contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state)
+          contrib_s2 = delta_cas_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state)
+          
+          if(dabs(psi_ref_coef(J,i_state)).ge.1.d-3) then
+            contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state)
+            contrib2_s2 = contrib_s2 / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state)
+            !$OMP ATOMIC
+            delta_mrcepa0_ii(J,i_state) -= contrib2 
+            delta_mrcepa0_ii_s2(J,i_state) -= contrib2_s2 
+          else
+            contrib = contrib * 0.5d0
+            contrib_s2 = contrib_s2 * 0.5d0
+          end if
+          !$OMP ATOMIC
+          delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib
+          delta_mrcepa0_ij_s2(J, det_cepa0_idx(i), i_state) += contrib_s2
+
+        end do kloop
+      end do
+    end do
+    end do
+    !$OMP END PARALLEL DO
+  end do
+  deallocate(idx_sorted_bit)
+  call wall_time(wall)
+  print *, "cepa0", wall, notf
+
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ double precision, delta_sub_ij, (N_det_ref,N_det_non_ref,N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_sub_ii, (N_det_ref, N_states) ]
+  use bitmasks
+  implicit none
+  
+  integer                         :: i_state, i, i_I, J, k, degree, degree2, l, deg, ni
+  integer                         :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_
+  logical                         :: ok
+  double precision                :: phase_Ji, phase_Ik, phase_Ii
+  double precision                :: contrib, contrib2, delta_IJk, HJk, HIk, HIl
+  integer, dimension(0:2,2,2)     :: exc_Ik, exc_Ji, exc_Ii
+  integer(bit_kind)               :: det_tmp(N_int, 2), det_tmp2(N_int, 2)
+  integer, allocatable            :: idx_sorted_bit(:)
+  integer, external               :: get_index_in_psi_det_sorted_bit
+  
+  integer :: II, blok
+  
+  provide delta_cas lambda_mrcc
+  allocate(idx_sorted_bit(N_det))
+  idx_sorted_bit(:) = -1
+  do i=1,N_det_non_ref
+    idx_sorted_bit(get_index_in_psi_det_sorted_bit(psi_non_ref(1,1,i), N_int)) = i
+  enddo
+    
+  do i_state = 1, N_states
+    delta_sub_ij(:,:,:) = 0d0
+    delta_sub_ii(:,:) = 0d0
+    
+    provide mo_bielec_integrals_in_map
+    
+    
+    !$OMP PARALLEL DO default(none) schedule(dynamic,10) shared(delta_sub_ij, delta_sub_ii)       &
+    !$OMP private(i, J, k, degree, degree2, l, deg, ni)       &
+    !$OMP private(p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_)     &
+    !$OMP private(ok, phase_Ji, phase_Ik, phase_Ii, contrib2, contrib, delta_IJk, HJk, HIk, HIl, exc_Ik, exc_Ji, exc_Ii) &
+    !$OMP private(det_tmp, det_tmp2, II, blok)    &
+    !$OMP shared(idx_sorted_bit, N_det_non_ref, N_det_ref, N_int, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef)   &
+    !$OMP shared(i_state,lambda_mrcc, hf_bitmask, active_sorb)
+    do i=1,N_det_non_ref
+      if(mod(i,1000) == 0) print *, i, "/", N_det_non_ref
+      do J=1,N_det_ref
+        call get_excitation(psi_ref(1,1,J),psi_non_ref(1,1,i),exc_Ji,degree,phase_Ji,N_int)
+        if(degree == -1) cycle
+        
+        
+        do II=1,N_det_ref
+          call apply_excitation(psi_ref(1,1,II),exc_Ji,det_tmp,ok,N_int)
+
+          if(.not. ok) cycle
+          l = get_index_in_psi_det_sorted_bit(det_tmp, N_int)
+          if(l == 0) cycle
+          l = idx_sorted_bit(l)
+          
+          call i_h_j(psi_ref(1,1,II), det_tmp, N_int, HIl)
+          
+          do k=1,N_det_non_ref
+            if(lambda_mrcc(i_state, k) == 0d0) cycle
+            call get_excitation(psi_ref(1,1,II),psi_non_ref(1,1,k),exc_Ik,degree2,phase_Ik,N_int)
+            
+            det_tmp(:,:) = 0_bit_kind
+            det_tmp2(:,:) = 0_bit_kind
+            
+            ok = .true.
+            do ni=1,N_int
+              det_tmp(ni,1) = iand(xor(HF_bitmask(ni,1), psi_non_ref(ni,1,k)), not(active_sorb(ni,1)))
+              det_tmp(ni,2) = iand(xor(HF_bitmask(ni,1), psi_non_ref(ni,1,i)), not(active_sorb(ni,1)))
+              ok = ok .and. (popcnt(det_tmp(ni,1)) + popcnt(det_tmp(ni,2)) == popcnt(xor(det_tmp(ni,1), det_tmp(ni,2))))
+
+              det_tmp(ni,1) = iand(xor(HF_bitmask(ni,2), psi_non_ref(ni,2,k)), not(active_sorb(ni,2)))
+              det_tmp(ni,2) = iand(xor(HF_bitmask(ni,2), psi_non_ref(ni,2,i)), not(active_sorb(ni,2)))
+              ok = ok .and. (popcnt(det_tmp(ni,1)) + popcnt(det_tmp(ni,2)) == popcnt(xor(det_tmp(ni,1), det_tmp(ni,2))))
+            end do
+            
+            if(ok) cycle
+            
+            
+            call i_h_j(psi_ref(1,1,J), psi_non_ref(1,1,k), N_int, HJk)
+            call i_h_j(psi_ref(1,1,II), psi_non_ref(1,1,k), N_int, HIk)
+            if(HJk == 0) cycle
+            !assert HIk == 0
+            delta_IJk = HJk * HIk * lambda_mrcc(i_state, k)
+            call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int)
+            if(ok) cycle
+            contrib = delta_IJk * HIl * lambda_mrcc(i_state,l)   
+            if(dabs(psi_ref_coef(II,i_state)).ge.1.d-3) then
+              contrib2 = contrib / psi_ref_coef(II, i_state) * psi_non_ref_coef(l,i_state)
+              !$OMP ATOMIC
+              delta_sub_ii(II,i_state) -= contrib2
+            else
+              contrib = contrib * 0.5d0
+            endif
+            !$OMP ATOMIC
+            delta_sub_ij(II, i, i_state) += contrib
+          end do
+        end do
+      end do
+    end do
+    !$OMP END PARALLEL DO
+  end do
+  deallocate(idx_sorted_bit)
+END_PROVIDER
+
+
+subroutine set_det_bit(det, p, s)
+   implicit none
+   integer(bit_kind),intent(inout) :: det(N_int, 2)
+   integer, intent(in) :: p, s
+   integer :: ni, pos
+ 
+   ni = (p-1)/bit_kind_size + 1
+   pos = mod(p-1, bit_kind_size)
+   det(ni,s) = ibset(det(ni,s), pos)
+end subroutine
+
+
+ BEGIN_PROVIDER [ double precision, h_cache, (N_det_ref,N_det_non_ref) ]
+&BEGIN_PROVIDER [ double precision, s2_cache, (N_det_ref,N_det_non_ref) ]
+  implicit none
+  integer :: i,j
+  do i=1,N_det_ref
+  do j=1,N_det_non_ref
+    call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_cache(i,j))
+    call get_s2(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, s2_cache(i,j))
+  end do
+  end do
+END_PROVIDER
+
+
+
+subroutine filter_tq(i_generator,n_selected,det_buffer,Nint,tq,N_tq,miniList,N_miniList)
+
+ use bitmasks
+ implicit none
+
+  integer, intent(in)            :: i_generator,n_selected, Nint
+
+  integer(bit_kind), intent(in)  :: det_buffer(Nint,2,n_selected)
+  integer                        :: i,j,k,m
+  logical                        :: is_in_wavefunction
+  integer,allocatable            :: degree(:)
+  integer,allocatable            :: idx(:)
+  logical                        :: good
+
+  integer(bit_kind), intent(inout) :: tq(Nint,2,n_selected) !! intent(out)
+  integer, intent(out)           :: N_tq
+  
+  integer                        :: nt,ni
+  logical, external              :: is_connected_to, is_generable
+  
+  integer(bit_kind),intent(in)  :: miniList(Nint,2,N_det_generators)
+  integer,intent(in)            :: N_miniList
+  
+  allocate(degree(psi_det_size)) 
+  allocate(idx(0:psi_det_size))
+  N_tq = 0
+
+  i_loop : do i=1,N_selected
+    do k=1, N_minilist
+      if(is_generable(miniList(1,1,k), det_buffer(1,1,i), Nint)) cycle i_loop
+    end do
+
+    ! Select determinants that are triple or quadruple excitations
+    ! from the ref
+    good = .True.
+    call get_excitation_degree_vector(psi_ref,det_buffer(1,1,i),degree,Nint,N_det_ref,idx) 
+    !good=(idx(0) == 0) tant que degree > 2 pas retourné par get_excitation_degree_vector
+    do k=1,idx(0)
+      if (degree(k) < 3) then
+        good = .False.
+        exit
+      endif
+    enddo
+    if (good) then
+      if (.not. is_in_wavefunction(det_buffer(1,1,i),Nint)) then
+        N_tq += 1
+        do k=1,N_int
+          tq(k,1,N_tq) = det_buffer(k,1,i)
+          tq(k,2,N_tq) = det_buffer(k,2,i)
+        enddo
+      endif
+    endif
+  enddo i_loop
+end
+
+
+subroutine filter_tq_micro(i_generator,n_selected,det_buffer,Nint,tq,N_tq,microlist,ptr_microlist,N_microlist,key_mask)
+
+ use bitmasks
+ implicit none
+
+  integer, intent(in)            :: i_generator,n_selected, Nint
+
+  integer(bit_kind), intent(in)  :: det_buffer(Nint,2,n_selected)
+  integer                        :: i,j,k,m
+  logical                        :: is_in_wavefunction
+  integer,allocatable            :: degree(:)
+  integer,allocatable            :: idx(:)
+  logical                        :: good
+
+  integer(bit_kind), intent(inout) :: tq(Nint,2,n_selected) !! intent(out)
+  integer, intent(out)           :: N_tq
+  
+  integer                        :: nt,ni
+  logical, external              :: is_connected_to, is_generable
+
+  integer(bit_kind),intent(in)  :: microlist(Nint,2,*)
+  integer,intent(in)  :: ptr_microlist(0:*)
+  integer,intent(in)            :: N_microlist(0:*)
+  integer(bit_kind),intent(in)   :: key_mask(Nint, 2)
+  
+  integer :: mobiles(2), smallerlist
+  
+  
+  allocate(degree(psi_det_size)) 
+  allocate(idx(0:psi_det_size))
+  N_tq = 0
+  
+  i_loop : do i=1,N_selected
+    call getMobiles(det_buffer(1,1,i), key_mask, mobiles, Nint) 
+    if(N_microlist(mobiles(1)) < N_microlist(mobiles(2))) then
+      smallerlist = mobiles(1)
+    else
+      smallerlist = mobiles(2)
+    end if
+    
+    if(N_microlist(smallerlist) > 0) then
+      do k=ptr_microlist(smallerlist), ptr_microlist(smallerlist)+N_microlist(smallerlist)-1
+        if(is_generable(microlist(1,1,k), det_buffer(1,1,i), Nint)) cycle i_loop
+      end do
+    end if
+    
+    if(N_microlist(0) > 0) then
+      do k=1, N_microlist(0)
+        if(is_generable(microlist(1,1,k), det_buffer(1,1,i), Nint)) cycle i_loop
+      end do
+    end if
+
+    ! Select determinants that are triple or quadruple excitations
+    ! from the ref
+    good = .True.
+    call get_excitation_degree_vector(psi_ref,det_buffer(1,1,i),degree,Nint,N_det_ref,idx) 
+    !good=(idx(0) == 0) tant que degree > 2 pas retourné par get_excitation_degree_vector
+    do k=1,idx(0)
+      if (degree(k) < 3) then
+        good = .False.
+        exit
+      endif
+    enddo
+    if (good) then
+      if (.not. is_in_wavefunction(det_buffer(1,1,i),Nint)) then
+        N_tq += 1
+        do k=1,N_int
+          tq(k,1,N_tq) = det_buffer(k,1,i)
+          tq(k,2,N_tq) = det_buffer(k,2,i)
+        enddo
+      endif
+    endif
+  enddo i_loop
+end
+
+
+
+
diff --git a/plugins/mrcc_selected/dressing_slave.irp.f b/plugins/mrcc_selected/dressing_slave.irp.f
new file mode 100644
index 00000000..c2e5dd55
--- /dev/null
+++ b/plugins/mrcc_selected/dressing_slave.irp.f
@@ -0,0 +1,601 @@
+subroutine mrsc2_dressing_slave_tcp(i)
+  implicit none
+  integer, intent(in)            :: i
+  BEGIN_DOC
+! Task for parallel MR-SC2
+  END_DOC
+  call mrsc2_dressing_slave(0,i)
+end
+
+
+subroutine mrsc2_dressing_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+  BEGIN_DOC
+! Task for parallel MR-SC2
+  END_DOC
+  call mrsc2_dressing_slave(1,i)
+end
+
+subroutine mrsc2_dressing_slave(thread,iproc)
+  use f77_zmq
+
+  implicit none
+  BEGIN_DOC
+! Task for parallel MR-SC2
+  END_DOC
+  integer,  intent(in)            :: thread, iproc
+!   integer                        :: j,l
+  integer                        :: rc 
+
+  integer                        :: worker_id, task_id 
+  character*(512)                :: task 
+ 
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket 
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket 
+ 
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket 
+  integer(ZMQ_PTR)               :: zmq_socket_push 
+
+  double precision, allocatable  :: delta(:,:,:), delta_s2(:,:,:)
+  
+
+
+  integer                         :: i_state, i, i_I, J, k, k2, k1, kk, ll, degree, degree2, m, l, deg, ni, m2
+  integer                         :: n(2)
+  integer                         :: p1,p2,h1,h2,s1,s2, blok, I_s, J_s, kn
+  logical                         :: ok
+  double precision                :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al
+  double precision                :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states)
+  double precision                :: contrib, contrib_s2, wall, iwall
+  double precision, allocatable   :: dleat(:,:,:), dleat_s2(:,:,:)
+  integer, dimension(0:2,2,2)     :: exc_iI, exc_Ik, exc_IJ
+  integer(bit_kind)               :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt
+  integer, external               :: get_index_in_psi_det_sorted_bit, searchDet, detCmp
+  logical, external               :: is_in_wavefunction, isInCassd, detEq
+  integer,allocatable :: komon(:)
+  logical :: komoned
+  !double precision, external :: get_dij
+     
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread)
+
+  allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2))
+  allocate (dleat_s2(N_states, N_det_non_ref, 2), delta_s2(N_states,0:N_det_non_ref, 2))
+  allocate(komon(0:N_det_non_ref))
+
+  do 
+    call get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task)
+    if (task_id == 0) exit
+    read (task,*) i_I, J, k1, k2
+    do i_state=1, N_states
+      ci_inv(i_state) = 1.d0 / psi_ref_coef(i_I,i_state)
+      cj_inv(i_state) = 1.d0 / psi_ref_coef(J,i_state)
+    end do
+    n = 0
+    delta(:,0,:) = 0d0
+    delta(:,:nlink(J),1) = 0d0
+    delta(:,:nlink(i_I),2) = 0d0
+    delta_s2(:,0,:) = 0d0
+    delta_s2(:,:nlink(J),1) = 0d0
+    delta_s2(:,:nlink(i_I),2) = 0d0
+    komon(0) = 0
+    komoned = .false.
+    
+    
+    
+    
+    do kk = k1, k2
+      k = det_cepa0_idx(linked(kk, i_I))
+      blok = blokMwen(kk, i_I)
+      
+      call get_excitation(psi_ref(1,1,i_I),psi_non_ref(1,1,k),exc_Ik,degree,phase_Ik,N_int)
+    
+      if(J /= i_I) then
+        call apply_excitation(psi_ref(1,1,J),exc_Ik,det_tmp2,ok,N_int)
+        if(.not. ok) cycle
+          
+        l = searchDet(det_cepa0(1,1,cepa0_shortcut(blok)), det_tmp2, cepa0_shortcut(blok+1)-cepa0_shortcut(blok), N_int)
+        if(l == -1) cycle
+        ll = cepa0_shortcut(blok)-1+l
+        l = det_cepa0_idx(ll)
+        ll = child_num(ll, J)
+      else
+        l = k
+        ll = kk
+      end if
+      
+        
+      if(.not. komoned) then
+        m = 0
+        m2 = 0
+          
+        do while(m < nlink(i_I) .and. m2 < nlink(J))
+          m += 1
+          m2 += 1
+          if(linked(m, i_I) < linked(m2, J)) then
+            m2 -= 1
+            cycle
+          else if(linked(m, i_I) > linked(m2, J)) then
+            m -= 1
+            cycle
+          end if
+          i = det_cepa0_idx(linked(m, i_I))
+          
+          if(h_cache(J,i) == 0.d0) cycle
+          if(h_cache(i_I,i) == 0.d0) cycle
+          
+          komon(0) += 1
+          kn = komon(0)
+          komon(kn) = i
+          
+          do i_state = 1,N_states
+            dkI = h_cache(J,i) * dij(i_I, i, i_state)
+            dleat(i_state, kn, 1) = dkI
+            dleat(i_state, kn, 2) = dkI
+
+            dkI = s2_cache(J,i) * dij(i_I, i, i_state)
+            dleat_s2(i_state, kn, 1) = dkI
+            dleat_s2(i_state, kn, 2) = dkI
+          end do
+
+        end do
+          
+        komoned = .true.
+      end if
+      
+      integer :: hpmin(2)
+      hpmin(1) = 2 - HP(1,k)
+      hpmin(2) = 2 - HP(2,k)
+
+      do m = 1, komon(0)
+        
+        i = komon(m)
+        if(HP(1,i) <= hpmin(1) .and. HP(2,i) <= hpmin(2) ) then
+          cycle
+        end if
+        
+        call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int)
+        if(.not. ok) cycle
+          
+        do i_state = 1, N_states 
+          contrib =  dij(i_I, k, i_state) * dleat(i_state, m, 2)
+          contrib_s2 =  dij(i_I, k, i_state) * dleat_s2(i_state, m, 2)
+          delta(i_state,ll,1) += contrib
+          delta_s2(i_state,ll,1) += contrib_s2
+          if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then
+            delta(i_state,0,1) -= contrib * ci_inv(i_state) * psi_non_ref_coef(l,i_state)
+            delta_s2(i_state,0,1) -= contrib_s2 * ci_inv(i_state) * psi_non_ref_coef(l,i_state)
+          endif
+          
+          if(I_i == J) cycle
+          contrib =  dij(J, l, i_state) * dleat(i_state, m, 1)
+          contrib_s2 =  dij(J, l, i_state) * dleat_s2(i_state, m, 1)
+          delta(i_state,kk,2) += contrib
+          delta_s2(i_state,kk,2) += contrib_s2
+          if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then
+            delta(i_state,0,2) -= contrib * cj_inv(i_state) * psi_non_ref_coef(k,i_state)
+            delta_s2(i_state,0,2) -= contrib_s2 * cj_inv(i_state) * psi_non_ref_coef(k,i_state)
+          end if
+        enddo !i_state
+      end do ! while
+    end do ! kk
+
+      
+      call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) 
+      call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
+ 
+!     end if
+    
+  enddo
+
+  deallocate(delta)
+
+  call disconnect_from_taskserver(zmq_to_qp_run_socket,zmq_socket_push,worker_id)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  call end_zmq_push_socket(zmq_socket_push,thread)
+
+end
+
+
+subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) 
+  use f77_zmq 
+  implicit none 
+  BEGIN_DOC 
+! Push integrals in the push socket 
+  END_DOC 
+
+  integer, intent(in)            :: i_I, J
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push 
+  double precision,intent(inout) :: delta(N_states, 0:N_det_non_ref, 2)
+  double precision,intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2)
+  integer, intent(in)            :: task_id 
+  integer                        :: rc , i_state,  i, kk, li
+  integer,allocatable            :: idx(:,:)
+  integer                        :: n(2)
+  logical :: ok
+  
+  allocate(idx(N_det_non_ref,2))
+  rc = f77_zmq_send( zmq_socket_push, i_I, 4, ZMQ_SNDMORE) 
+  if (rc /= 4) then 
+    print *, irp_here,  'f77_zmq_send( zmq_socket_push, i_I, 4, ZMQ_SNDMORE)' 
+    stop 'error' 
+  endif 
+
+  rc = f77_zmq_send( zmq_socket_push, J, 4, ZMQ_SNDMORE) 
+  if (rc /= 4) then 
+    print *, irp_here,  'f77_zmq_send( zmq_socket_push, J, 4, ZMQ_SNDMORE)' 
+    stop 'error' 
+  endif 
+  
+  
+  do kk=1,2
+    n(kk)=0
+    if(kk == 1) li = nlink(j)
+    if(kk == 2) li = nlink(i_I)
+    do i=1, li
+      ok = .false.
+      do i_state=1,N_states
+        if(delta(i_state, i, kk) /= 0d0) then
+          ok = .true.
+          exit
+        end if
+      end do
+      
+      if(ok) then
+        n(kk) += 1
+!         idx(n,kk) = i
+        if(kk == 1) then
+          idx(n(1),1) = det_cepa0_idx(linked(i, J))
+        else
+          idx(n(2),2) = det_cepa0_idx(linked(i, i_I))
+        end if
+        
+        do i_state=1, N_states
+            delta(i_state, n(kk), kk) = delta(i_state, i, kk)
+        end do
+      end if
+    end do
+    
+    rc = f77_zmq_send( zmq_socket_push, n(kk), 4, ZMQ_SNDMORE) 
+    if (rc /= 4) then 
+      print *, irp_here,  'f77_zmq_send( zmq_socket_push, n, 4, ZMQ_SNDMORE)' 
+      stop 'error' 
+    endif 
+    
+    if(n(kk) /= 0) then
+      rc = f77_zmq_send( zmq_socket_push, delta(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE)  ! delta(1,0,1) = delta_I   delta(1,0,2) = delta_J 
+      if (rc /=  (n(kk)+1)*8*N_states) then 
+        print *, irp_here,  'f77_zmq_send( zmq_socket_push, delta, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)'
+        stop 'error' 
+      endif 
+
+      rc = f77_zmq_send( zmq_socket_push, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE)  ! delta_s2(1,0,1) = delta_I   delta_s2(1,0,2) = delta_J 
+      if (rc /=  (n(kk)+1)*8*N_states) then 
+        print *, irp_here,  'f77_zmq_send( zmq_socket_push, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)'
+        stop 'error' 
+      endif 
+      
+      rc = f77_zmq_send( zmq_socket_push, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) 
+      if (rc /=  n(kk)*4) then 
+        print *, irp_here,  'f77_zmq_send( zmq_socket_push, delta, 8*n(kk), ZMQ_SNDMORE)'
+        stop 'error' 
+      endif 
+    end if
+  end do
+  
+  
+  rc = f77_zmq_send( zmq_socket_push, task_id, 4, 0) 
+  if (rc /= 4) then 
+    print *, irp_here,  'f77_zmq_send( zmq_socket_push, task_id, 4, 0)' 
+    stop 'error' 
+  endif 
+ 
+! ! Activate is zmq_socket_push is a REQ 
+!   integer :: idummy 
+!   rc = f77_zmq_recv( zmq_socket_push, idummy, 4, 0) 
+!   if (rc /= 4) then 
+!     print *, irp_here, 'f77_zmq_send( zmq_socket_push, idummy, 4, 0)' 
+!     stop 'error' 
+!   endif 
+end
+
+
+
+subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) 
+  use f77_zmq 
+  implicit none 
+  BEGIN_DOC 
+! Push integrals in the push socket 
+  END_DOC 
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull 
+  integer, intent(out)           :: i_I, J, n(2)
+  double precision, intent(inout) :: delta(N_states, 0:N_det_non_ref, 2)
+  double precision, intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2)
+  integer, intent(out)           :: task_id 
+  integer                        :: rc , i, kk
+  integer,intent(inout) :: idx(N_det_non_ref,2)
+  logical :: ok
+  
+  rc = f77_zmq_recv( zmq_socket_pull, i_I, 4, ZMQ_SNDMORE) 
+  if (rc /= 4) then 
+    print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, i_I, 4, ZMQ_SNDMORE)' 
+    stop 'error' 
+  endif 
+  
+  rc = f77_zmq_recv( zmq_socket_pull, J, 4, ZMQ_SNDMORE) 
+  if (rc /= 4) then 
+    print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, J, 4, ZMQ_SNDMORE)' 
+    stop 'error' 
+  endif 
+  
+  do kk = 1, 2
+    rc = f77_zmq_recv( zmq_socket_pull, n(kk), 4, ZMQ_SNDMORE) 
+    if (rc /= 4) then 
+      print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, n, 4, ZMQ_SNDMORE)' 
+      stop 'error' 
+    endif 
+    
+    if(n(kk) /= 0) then
+      rc = f77_zmq_recv( zmq_socket_pull, delta(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) 
+      if (rc /= (n(kk)+1)*8*N_states) then 
+        print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, delta, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)'
+        stop 'error' 
+      endif 
+      
+      rc = f77_zmq_recv( zmq_socket_pull, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) 
+      if (rc /= (n(kk)+1)*8*N_states) then 
+        print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)'
+        stop 'error' 
+      endif 
+      
+      rc = f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) 
+      if (rc /= n(kk)*4) then 
+        print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE)'
+        stop 'error' 
+      endif 
+    end if
+  end do
+  
+  rc = f77_zmq_recv( zmq_socket_pull, task_id, 4, 0) 
+  if (rc /= 4) then 
+    print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, task_id, 4, 0)' 
+    stop 'error' 
+  endif 
+
+ 
+! ! Activate is zmq_socket_pull is a REP 
+!   integer :: idummy 
+!   rc = f77_zmq_send( zmq_socket_pull, idummy, 4, 0) 
+!   if (rc /= 4) then 
+!     print *, irp_here, 'f77_zmq_send( zmq_socket_pull, idummy, 4, 0)' 
+!     stop 'error' 
+!   endif 
+end
+
+
+
+subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_,delta_ii_s2_,delta_ij_s2_)
+  use f77_zmq
+  implicit none
+  BEGIN_DOC 
+! Collects results from the AO integral calculation 
+  END_DOC 
+ 
+  double precision,intent(inout)               :: delta_ij_(N_states,N_det_non_ref,N_det_ref) 
+  double precision,intent(inout)               :: delta_ii_(N_states,N_det_ref)
+  double precision,intent(inout)               :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) 
+  double precision,intent(inout)               :: delta_ii_s2_(N_states,N_det_ref)
+
+!   integer                        :: j,l
+  integer                        :: rc 
+   
+  double precision, allocatable  :: delta(:,:,:), delta_s2(:,:,:) 
+   
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket 
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket 
+   
+  integer(ZMQ_PTR), external     :: new_zmq_pull_socket 
+  integer(ZMQ_PTR)               :: zmq_socket_pull 
+   
+  integer*8                      :: control, accu 
+  integer                        :: task_id, more 
+ 
+  integer                        :: I_i, J, l, i_state, n(2), kk
+  integer,allocatable :: idx(:,:)
+  
+  delta_ii_(:,:) = 0d0
+  delta_ij_(:,:,:) = 0d0
+  delta_ii_s2_(:,:) = 0d0
+  delta_ij_s2_(:,:,:) = 0d0
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() 
+  zmq_socket_pull = new_zmq_pull_socket() 
+ 
+  allocate ( delta(N_states,0:N_det_non_ref,2), delta_s2(N_states,0:N_det_non_ref,2) ) 
+  
+  allocate(idx(N_det_non_ref,2))
+  more = 1 
+  do while (more == 1) 
+         
+    call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id)
+    
+
+      do l=1, n(1)
+        do i_state=1,N_states
+          delta_ij_(i_state,idx(l,1),i_I) += delta(i_state,l,1)
+          delta_ij_s2_(i_state,idx(l,1),i_I) += delta_s2(i_state,l,1)
+        end do
+      end do
+      
+      do l=1, n(2)
+        do i_state=1,N_states
+          delta_ij_(i_state,idx(l,2),J) += delta(i_state,l,2)
+          delta_ij_s2_(i_state,idx(l,2),J) += delta_s2(i_state,l,2)
+        end do
+      end do
+
+    
+       if(n(1) /= 0) then 
+       do i_state=1,N_states
+         delta_ii_(i_state,i_I) += delta(i_state,0,1)
+         delta_ii_s2_(i_state,i_I) += delta_s2(i_state,0,1)
+       end do
+       end if
+
+      if(n(2) /= 0) then 
+       do i_state=1,N_states
+         delta_ii_(i_state,J) += delta(i_state,0,2)
+         delta_ii_s2_(i_state,J) += delta_s2(i_state,0,2)
+       end do
+       end if
+
+
+    if (task_id /= 0) then 
+      call zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id,more) 
+    endif 
+
+ 
+  enddo 
+  deallocate( delta, delta_s2 )
+ 
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  call end_zmq_pull_socket(zmq_socket_pull)
+
+end
+
+
+
+
+ BEGIN_PROVIDER [ double precision, delta_ij_old, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_old, (N_states,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ij_s2_old, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_s2_old, (N_states,N_det_ref) ]
+  implicit none
+  
+  integer                         :: i_state, i, i_I, J, k, kk, degree, degree2, m, l, deg, ni, m2
+  integer                         :: p1,p2,h1,h2,s1,s2, blok, I_s, J_s, nex, nzer, ntot
+!   integer, allocatable            :: linked(:,:), blokMwen(:, :), nlink(:)
+  logical                         :: ok
+  double precision                :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al, diI, hIi, hJi, delta_JI, dkI(N_states), HkI, ci_inv(N_states), dia_hla(N_states)
+  double precision                :: contrib, wall, iwall ! , searchance(N_det_ref)
+  integer, dimension(0:2,2,2)     :: exc_iI, exc_Ik, exc_IJ
+  integer(bit_kind)               :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt
+  integer, external               :: get_index_in_psi_det_sorted_bit, searchDet, detCmp
+  logical, external               :: is_in_wavefunction, isInCassd, detEq
+  character*(512)                :: task 
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket 
+  
+  integer :: KKsize = 1000000
+  
+  
+  call new_parallel_job(zmq_to_qp_run_socket,'mrsc2')
+
+
+  call wall_time(iwall)
+!   allocate(linked(N_det_non_ref, N_det_ref), blokMwen(N_det_non_ref, N_det_ref), nlink(N_det_ref))
+  
+
+!   searchance = 0d0
+!   do J = 1, N_det_ref
+!     nlink(J) = 0
+!     do blok=1,cepa0_shortcut(0)
+!     do k=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1
+!       call get_excitation_degree(psi_ref(1,1,J),det_cepa0(1,1,k),degree,N_int)
+!       if(degree <= 2) then
+!         nlink(J) += 1
+!         linked(nlink(J),J) = k
+!         blokMwen(nlink(J),J) = blok
+!         searchance(J) += 1d0 + log(dfloat(cepa0_shortcut(blok+1) - cepa0_shortcut(blok)))
+!       end if
+!     end do
+!     end do
+!   end do
+
+     
+     
+!   stop
+  nzer = 0
+  ntot = 0
+  do nex = 3, 0, -1
+    print *, "los ",nex
+    do I_s = N_det_ref, 1, -1
+!         if(mod(I_s,1) == 0) then
+!           call wall_time(wall)
+!           wall = wall-iwall
+!           print *, I_s, "/", N_det_ref, wall * (dfloat(N_det_ref) / dfloat(I_s)), wall, wall * (dfloat(N_det_ref) / dfloat(I_s))-wall
+!         end if
+
+
+      do J_s = 1, I_s
+      
+        call get_excitation_degree(psi_ref(1,1,J_s), psi_ref(1,1,I_s), degree, N_int)
+        if(degree /= nex) cycle
+        if(nex == 3) nzer = nzer + 1
+        ntot += 1
+!           if(degree > 3) then
+!             deg += 1
+!             cycle
+!           else if(degree == -10) then
+!             KKsize = 100000
+!           else
+!             KKsize = 1000000
+!           end if
+        
+        
+        
+        if(searchance(I_s) < searchance(J_s)) then
+          i_I = I_s
+          J = J_s
+        else
+          i_I = J_s
+          J = I_s
+        end if
+        
+        KKsize = nlink(1)
+        if(nex == 0) KKsize = int(float(nlink(1)) / float(nlink(i_I)) * (float(nlink(1)) / 64d0))
+        
+        !if(KKsize == 0) stop "ZZEO"
+        
+        do kk = 1 , nlink(i_I), KKsize
+          write(task,*) I_i, J, kk, int(min(kk+KKsize-1, nlink(i_I)))
+          call add_task_to_taskserver(zmq_to_qp_run_socket,task)
+        end do
+        
+  !       do kk = 1 , nlink(i_I)
+  !         k = linked(kk,i_I)
+  !         blok = blokMwen(kk,i_I)
+  !         write(task,*) I_i, J, k, blok
+  !         call add_task_to_taskserver(zmq_to_qp_run_socket,task)
+  !         
+  !       enddo !kk
+      enddo !J
+          
+    enddo !I
+  end do ! nex
+  print *, "tasked"
+!  integer(ZMQ_PTR)               ∷ collector_thread
+!  external                       ∷ ao_bielec_integrals_in_map_collector
+!  rc = pthread_create(collector_thread, mrsc2_dressing_collector)
+  print *, nzer, ntot, float(nzer) / float(ntot)
+  provide nproc
+  !$OMP PARALLEL DEFAULT(none)  SHARED(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old)  PRIVATE(i) NUM_THREADS(nproc+1)
+      i = omp_get_thread_num()
+      if (i==0) then
+        call mrsc2_dressing_collector(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old)
+      else
+        call mrsc2_dressing_slave_inproc(i)
+      endif
+  !$OMP END PARALLEL
+
+!  rc = pthread_join(collector_thread)
+  call end_parallel_job(zmq_to_qp_run_socket, 'mrsc2')
+    
+
+END_PROVIDER
+
+
+
diff --git a/plugins/mrcc_selected/ezfio_interface.irp.f b/plugins/mrcc_selected/ezfio_interface.irp.f
new file mode 100644
index 00000000..062af449
--- /dev/null
+++ b/plugins/mrcc_selected/ezfio_interface.irp.f
@@ -0,0 +1,61 @@
+! DO NOT MODIFY BY HAND
+! Created by $QP_ROOT/scripts/ezfio_interface/ei_handler.py
+! from file /home/scemama/quantum_package/src/mrcc_selected/EZFIO.cfg
+
+
+BEGIN_PROVIDER [ double precision, thresh_dressed_ci  ]
+  implicit none
+  BEGIN_DOC
+! Threshold on the convergence of the dressed CI energy
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  
+  call ezfio_has_mrcc_selected_thresh_dressed_ci(has)
+  if (has) then
+    call ezfio_get_mrcc_selected_thresh_dressed_ci(thresh_dressed_ci)
+  else
+    print *, 'mrcc_selected/thresh_dressed_ci not found in EZFIO file'
+    stop 1
+  endif
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, n_it_max_dressed_ci  ]
+  implicit none
+  BEGIN_DOC
+! Maximum number of dressed CI iterations
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  
+  call ezfio_has_mrcc_selected_n_it_max_dressed_ci(has)
+  if (has) then
+    call ezfio_get_mrcc_selected_n_it_max_dressed_ci(n_it_max_dressed_ci)
+  else
+    print *, 'mrcc_selected/n_it_max_dressed_ci not found in EZFIO file'
+    stop 1
+  endif
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ integer, lambda_type  ]
+  implicit none
+  BEGIN_DOC
+! lambda type
+  END_DOC
+
+  logical                        :: has
+  PROVIDE ezfio_filename
+  
+  call ezfio_has_mrcc_selected_lambda_type(has)
+  if (has) then
+    call ezfio_get_mrcc_selected_lambda_type(lambda_type)
+  else
+    print *, 'mrcc_selected/lambda_type not found in EZFIO file'
+    stop 1
+  endif
+
+END_PROVIDER
diff --git a/plugins/mrcc_selected/mrcc_selected.irp.f b/plugins/mrcc_selected/mrcc_selected.irp.f
new file mode 100644
index 00000000..91592e62
--- /dev/null
+++ b/plugins/mrcc_selected/mrcc_selected.irp.f
@@ -0,0 +1,19 @@
+program mrsc2sub
+  implicit none
+  double precision, allocatable  :: energy(:)
+  allocate (energy(N_states))
+  
+  !mrmode : 1=mrcepa0, 2=mrsc2 add, 3=mrcc
+  mrmode = 3
+  
+  read_wf = .True.
+  SOFT_TOUCH read_wf
+  call print_cas_coefs
+  call set_generators_bitmasks_as_holes_and_particles
+  call run(N_states,energy)
+  if(do_pt2_end)then
+    call run_pt2(N_states,energy)
+  endif
+  deallocate(energy)
+end
+
diff --git a/plugins/mrcc_selected/mrcepa0_general.irp.f b/plugins/mrcc_selected/mrcepa0_general.irp.f
new file mode 100644
index 00000000..e3a2d1f5
--- /dev/null
+++ b/plugins/mrcc_selected/mrcepa0_general.irp.f
@@ -0,0 +1,245 @@
+
+
+subroutine run(N_st,energy)
+  implicit none
+  
+  integer, intent(in) :: N_st 
+  double precision, intent(out) :: energy(N_st) 
+
+  integer :: i,j
+
+  double precision :: E_new, E_old, delta_e
+  integer :: iteration
+  double precision :: E_past(4)
+  
+  integer :: n_it_mrcc_max
+  double precision :: thresh_mrcc
+  double precision, allocatable :: lambda(:)
+  allocate (lambda(N_states))
+
+  
+  thresh_mrcc = thresh_dressed_ci
+  n_it_mrcc_max = n_it_max_dressed_ci
+
+  if(n_it_mrcc_max == 1) then
+    do j=1,N_states_diag
+      do i=1,N_det
+        psi_coef(i,j) = CI_eigenvectors_dressed(i,j)
+      enddo
+    enddo
+    SOFT_TOUCH psi_coef ci_energy_dressed
+    call write_double(6,ci_energy_dressed(1),"Final MRCC energy")
+    call ezfio_set_mrcepa0_energy(ci_energy_dressed(1))
+    call save_wavefunction
+    energy(:) = ci_energy_dressed(:)
+  else
+    E_new = 0.d0
+    delta_E = 1.d0
+    iteration = 0
+    lambda = 1.d0
+    do while (delta_E > thresh_mrcc)
+      iteration += 1
+      print *,  '===========================' 
+      print *,  'MRCEPA0 Iteration', iteration
+      print *,  '===========================' 
+      print *,  ''
+      E_old = sum(ci_energy_dressed)
+      call write_double(6,ci_energy_dressed(1),"MRCEPA0 energy")
+      call diagonalize_ci_dressed(lambda)
+      E_new = sum(ci_energy_dressed)
+      delta_E = dabs(E_new - E_old)
+      call save_wavefunction
+      call ezfio_set_mrcepa0_energy(ci_energy_dressed(1))
+      if (iteration >= n_it_mrcc_max) then
+        exit
+      endif
+    enddo
+    call write_double(6,ci_energy_dressed(1),"Final MRCEPA0 energy")
+    energy(:) = ci_energy_dressed(:)
+  endif
+end
+
+
+subroutine print_cas_coefs
+  implicit none
+
+  integer :: i,j
+  print *,  'CAS'
+  print *,  '==='
+  do i=1,N_det_cas
+    print *,  (psi_cas_coef(i,j), j=1,N_states)
+    call debug_det(psi_cas(1,1,i),N_int)
+  enddo
+  call write_double(6,ci_energy(1),"Initial CI energy")
+
+end
+
+
+
+
+subroutine run_pt2_old(N_st,energy) 
+  implicit none 
+  integer :: i,j,k 
+  integer, intent(in)          :: N_st 
+  double precision, intent(in) :: energy(N_st) 
+  double precision :: pt2_redundant(N_st), pt2(N_st)
+  double precision :: norm_pert(N_st),H_pert_diag(N_st)
+  
+  pt2_redundant = 0.d0 
+  pt2 = 0d0
+  !if(lambda_mrcc_pt2(0)  == 0) return
+  
+  print*,'Last iteration only to compute the PT2' 
+  
+  print * ,'Computing the redundant PT2 contribution'
+
+  if (mrmode == 1) then
+
+    N_det_generators = lambda_mrcc_kept(0) 
+    N_det_selectors = lambda_mrcc_kept(0) 
+
+    do i=1,N_det_generators
+      j = lambda_mrcc_kept(i)
+      do k=1,N_int
+        psi_det_generators(k,1,i) = psi_non_ref(k,1,j)
+        psi_det_generators(k,2,i) = psi_non_ref(k,2,j)
+        psi_selectors(k,1,i) = psi_non_ref(k,1,j)
+        psi_selectors(k,2,i) = psi_non_ref(k,2,j)
+      enddo
+      do k=1,N_st
+        psi_coef_generators(i,k) = psi_non_ref_coef(j,k)
+        psi_selectors_coef(i,k) = psi_non_ref_coef(j,k)
+      enddo
+    enddo
+
+  else
+
+    N_det_generators = N_det_non_ref
+    N_det_selectors = N_det_non_ref
+
+    do i=1,N_det_generators
+      j = i
+      do k=1,N_int
+        psi_det_generators(k,1,i) = psi_non_ref(k,1,j)
+        psi_det_generators(k,2,i) = psi_non_ref(k,2,j)
+        psi_selectors(k,1,i) = psi_non_ref(k,1,j)
+        psi_selectors(k,2,i) = psi_non_ref(k,2,j)
+      enddo
+      do k=1,N_st
+        psi_coef_generators(i,k) = psi_non_ref_coef(j,k)
+        psi_selectors_coef(i,k) = psi_non_ref_coef(j,k)
+      enddo
+    enddo
+
+  endif
+  
+  SOFT_TOUCH N_det_selectors psi_selectors_coef psi_selectors N_det_generators psi_det_generators psi_coef_generators ci_eigenvectors_dressed ci_eigenvectors_s2_dressed ci_electronic_energy_dressed
+  SOFT_TOUCH psi_ref_coef_diagonalized psi_ref_energy_diagonalized
+
+  call H_apply_mrcepa_PT2(pt2_redundant, norm_pert, H_pert_diag,  N_st) 
+  
+  print * ,'Computing the remaining contribution'
+
+  threshold_selectors = max(threshold_selectors,threshold_selectors_pt2)
+  threshold_generators = max(threshold_generators,threshold_generators_pt2)
+
+  N_det_generators = N_det_non_ref + N_det_ref
+  N_det_selectors = N_det_non_ref + N_det_ref
+  
+  psi_det_generators(:,:,:N_det_ref) = psi_ref(:,:,:N_det_ref)
+  psi_selectors(:,:,:N_det_ref) = psi_ref(:,:,:N_det_ref)
+  psi_coef_generators(:N_det_ref,:) = psi_ref_coef(:N_det_ref,:)
+  psi_selectors_coef(:N_det_ref,:) = psi_ref_coef(:N_det_ref,:)
+  
+  do i=N_det_ref+1,N_det_generators
+    j = i-N_det_ref
+    do k=1,N_int
+      psi_det_generators(k,1,i) = psi_non_ref(k,1,j)
+      psi_det_generators(k,2,i) = psi_non_ref(k,2,j)
+      psi_selectors(k,1,i) = psi_non_ref(k,1,j)
+      psi_selectors(k,2,i) = psi_non_ref(k,2,j)
+    enddo
+    do k=1,N_st
+      psi_coef_generators(i,k) = psi_non_ref_coef(j,k)
+      psi_selectors_coef(i,k) = psi_non_ref_coef(j,k)
+    enddo
+  enddo
+  
+  SOFT_TOUCH N_det_selectors psi_selectors_coef psi_selectors N_det_generators psi_det_generators psi_coef_generators ci_eigenvectors_dressed ci_eigenvectors_s2_dressed ci_electronic_energy_dressed
+  SOFT_TOUCH psi_ref_coef_diagonalized psi_ref_energy_diagonalized
+
+  call H_apply_mrcepa_PT2(pt2, norm_pert, H_pert_diag,  N_st) 
+ 
+
+  print *, "Redundant PT2 :",pt2_redundant
+  print *, "Full      PT2 :",pt2
+  print *, lambda_mrcc_kept(0), N_det, N_det_ref, psi_coef(1,1), psi_ref_coef(1,1)
+  pt2 = pt2 - pt2_redundant
+  
+  print *,  'Final step' 
+  print *,  'N_det    = ', N_det 
+  print *,  'N_states = ', N_states 
+  print *,  'PT2      = ', pt2 
+  print *,  'E        = ', energy 
+  print *,  'E+PT2    = ', energy+pt2 
+  print *,  '-----' 
+ 
+
+  call ezfio_set_mrcepa0_energy_pt2(energy(1)+pt2(1))
+
+end 
+
+subroutine run_pt2(N_st,energy) 
+  implicit none 
+  integer :: i,j,k 
+  integer, intent(in)          :: N_st 
+  double precision, intent(in) :: energy(N_st) 
+  double precision :: pt2(N_st)
+  double precision :: norm_pert(N_st),H_pert_diag(N_st)
+  
+  pt2 = 0d0
+  !if(lambda_mrcc_pt2(0)  == 0) return
+  
+  print*,'Last iteration only to compute the PT2' 
+  
+  N_det_generators = N_det_cas
+  N_det_selectors = N_det_non_ref
+
+  do i=1,N_det_generators
+    do k=1,N_int
+      psi_det_generators(k,1,i) = psi_ref(k,1,i)
+      psi_det_generators(k,2,i) = psi_ref(k,2,i)
+    enddo
+    do k=1,N_st
+      psi_coef_generators(i,k) = psi_ref_coef(i,k)
+    enddo
+  enddo
+  do i=1,N_det
+    do k=1,N_int
+      psi_selectors(k,1,i) = psi_det_sorted(k,1,i)
+      psi_selectors(k,2,i) = psi_det_sorted(k,2,i)
+    enddo
+    do k=1,N_st
+      psi_selectors_coef(i,k) = psi_coef_sorted(i,k)
+    enddo
+  enddo
+
+  SOFT_TOUCH N_det_selectors psi_selectors_coef psi_selectors N_det_generators psi_det_generators psi_coef_generators ci_eigenvectors_dressed ci_eigenvectors_s2_dressed ci_electronic_energy_dressed
+  SOFT_TOUCH psi_ref_coef_diagonalized psi_ref_energy_diagonalized
+
+  call H_apply_mrcepa_PT2(pt2, norm_pert, H_pert_diag,  N_st) 
+  
+!  call ezfio_set_full_ci_energy_pt2(energy+pt2)
+
+  print *,  'Final step' 
+  print *,  'N_det    = ', N_det 
+  print *,  'N_states = ', N_states 
+  print *,  'PT2      = ', pt2 
+  print *,  'E        = ', energy 
+  print *,  'E+PT2    = ', energy+pt2 
+  print *,  '-----' 
+
+  call ezfio_set_mrcepa0_energy_pt2(energy(1)+pt2(1))
+
+end 
+
diff --git a/plugins/mrcepa0/.gitignore b/plugins/mrcepa0/.gitignore
deleted file mode 100644
index 7d9ee55d..00000000
--- a/plugins/mrcepa0/.gitignore
+++ /dev/null
@@ -1,36 +0,0 @@
-# Automatically created by $QP_ROOT/scripts/module/module_handler.py 
-.ninja_deps
-.ninja_log
-AO_Basis
-Bitmask
-Davidson
-Determinants
-Electrons
-Ezfio_files
-Generators_full
-Hartree_Fock
-IRPF90_man
-IRPF90_temp
-Integrals_Bielec
-Integrals_Monoelec
-MOGuess
-MO_Basis
-MRCC_Utils
-Makefile
-Makefile.depend
-Nuclei
-Perturbation
-Properties
-Pseudo
-Psiref_CAS
-Psiref_Utils
-Selectors_full
-Utils
-ZMQ
-ezfio_interface.irp.f
-irpf90.make
-irpf90_entities
-mrcc
-mrcepa0
-mrsc2
-tags
\ No newline at end of file
diff --git a/plugins/mrcepa0/dressing.irp.f b/plugins/mrcepa0/dressing.irp.f
index 9f041cd3..c772e2aa 100644
--- a/plugins/mrcepa0/dressing.irp.f
+++ b/plugins/mrcepa0/dressing.irp.f
@@ -4,6 +4,8 @@ use bitmasks
 
  BEGIN_PROVIDER [ double precision, delta_ij_mrcc, (N_states,N_det_non_ref,N_det_ref) ]
 &BEGIN_PROVIDER [ double precision, delta_ii_mrcc, (N_states, N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ij_s2_mrcc, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_s2_mrcc, (N_states, N_det_ref) ]
   use bitmasks
   implicit none
   integer :: gen, h, p, n, t, i, h1, h2, p1, p2, s1, s2, iproc
@@ -14,11 +16,13 @@ use bitmasks
   
   delta_ij_mrcc = 0d0
   delta_ii_mrcc = 0d0
-  print *, "Dij", dij(1,1,1)
+  delta_ij_s2_mrcc = 0d0
+  delta_ii_s2_mrcc = 0d0
+  PROVIDE dij
   provide hh_shortcut psi_det_size! lambda_mrcc
   !$OMP PARALLEL DO default(none)  schedule(dynamic) &
   !$OMP shared(psi_det_generators, N_det_generators, hh_exists, pp_exists, N_int, hh_shortcut) &
-  !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc) &
+  !$OMP shared(N_det_non_ref, N_det_ref, delta_ii_mrcc, delta_ij_mrcc, delta_ii_s2_mrcc, delta_ij_s2_mrcc) &
   !$OMP private(h, n, mask, omask, buf, ok, iproc)
   do gen= 1, N_det_generators
     allocate(buf(N_int, 2, N_det_non_ref))
@@ -37,7 +41,9 @@ use bitmasks
       end do
       n = n - 1
 
-      if(n /= 0) call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc,gen,n,buf,N_int,omask)
+      if(n /= 0) then
+        call mrcc_part_dress(delta_ij_mrcc, delta_ii_mrcc, delta_ij_s2_mrcc, delta_ii_s2_mrcc, gen,n,buf,N_int,omask)
+      endif
 
     end do
     deallocate(buf)
@@ -52,13 +58,15 @@ END_PROVIDER
 ! end subroutine
 
 
-subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffer,Nint,key_mask)
+subroutine mrcc_part_dress(delta_ij_, delta_ii_,delta_ij_s2_, delta_ii_s2_,i_generator,n_selected,det_buffer,Nint,key_mask)
  use bitmasks
  implicit none
 
   integer, intent(in)            :: i_generator,n_selected, Nint
   double precision, intent(inout) :: delta_ij_(N_states,N_det_non_ref,N_det_ref)
   double precision, intent(inout) :: delta_ii_(N_states,N_det_ref)
+  double precision, intent(inout) :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref)
+  double precision, intent(inout) :: delta_ii_s2_(N_states,N_det_ref)
 
   integer(bit_kind), intent(in)  :: det_buffer(Nint,2,n_selected)
   integer                        :: i,j,k,l,m
@@ -68,8 +76,8 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
   integer(bit_kind),allocatable  :: tq(:,:,:)
   integer                        :: N_tq, c_ref ,degree
 
-  double precision               :: hIk, hla, hIl, dIk(N_states), dka(N_states), dIa(N_states)
-  double precision, allocatable  :: dIa_hla(:,:)
+  double precision               :: hIk, hla, hIl, sla, dIk(N_states), dka(N_states), dIa(N_states)
+  double precision, allocatable  :: dIa_hla(:,:), dIa_sla(:,:)
   double precision               :: haj, phase, phase2
   double precision               :: f(N_states), ci_inv(N_states)
   integer                        :: exc(0:2,2,2)
@@ -82,7 +90,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
   integer(bit_kind),intent(in)   :: key_mask(Nint, 2)
   integer,allocatable            :: idx_miniList(:)
   integer                        :: N_miniList, ni, leng
-  double precision, allocatable  :: hij_cache(:)
+  double precision, allocatable  :: hij_cache(:), sij_cache(:)
   
   integer(bit_kind), allocatable :: microlist(:,:,:), microlist_zero(:,:,:)
   integer, allocatable           :: idx_microlist(:), N_microlist(:), ptr_microlist(:), idx_microlist_zero(:)
@@ -92,7 +100,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
   
 
   leng = max(N_det_generators, N_det_non_ref)
-  allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref))
+  allocate(miniList(Nint, 2, leng), tq(Nint,2,n_selected), idx_minilist(leng), hij_cache(N_det_non_ref), sij_cache(N_det_non_ref))
   allocate(idx_alpha(0:psi_det_size), degree_alpha(psi_det_size))
   !create_minilist_find_previous(key_mask, fullList, miniList, N_fullList, N_miniList, fullMatch, Nint)
   call create_minilist_find_previous(key_mask, psi_det_generators, miniList, i_generator-1, N_miniList, fullMatch, Nint)
@@ -117,7 +125,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
   
   deallocate(microlist, idx_microlist)
   
-  allocate (dIa_hla(N_states,N_det_non_ref))
+  allocate (dIa_hla(N_states,N_det_non_ref), dIa_sla(N_states,N_det_non_ref))
   
   ! |I>
   
@@ -185,6 +193,7 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
     do l_sd=1,idx_alpha(0)
       k_sd = idx_alpha(l_sd)
       call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hij_cache(k_sd))
+      call get_s2(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,sij_cache(k_sd))
     enddo
     ! |I>
     do i_I=1,N_det_ref
@@ -282,31 +291,36 @@ subroutine mrcc_part_dress(delta_ij_, delta_ii_,i_generator,n_selected,det_buffe
       do l_sd=1,idx_alpha(0)
         k_sd = idx_alpha(l_sd)
         hla = hij_cache(k_sd)
+        sla = sij_cache(k_sd)
 !        call i_h_j(tq(1,1,i_alpha),psi_non_ref(1,1,idx_alpha(l_sd)),Nint,hla)
         do i_state=1,N_states
           dIa_hla(i_state,k_sd) = dIa(i_state) * hla
+          dIa_sla(i_state,k_sd) = dIa(i_state) * sla
         enddo
       enddo
       call omp_set_lock( psi_ref_lock(i_I) )
       do i_state=1,N_states
-        if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5)then
+        if(dabs(psi_ref_coef(i_I,i_state)).ge.1.d-3)then
           do l_sd=1,idx_alpha(0)
             k_sd = idx_alpha(l_sd)
             delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + dIa_hla(i_state,k_sd)
             delta_ii_(i_state,i_I) = delta_ii_(i_state,i_I) - dIa_hla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd)
+            delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + dIa_sla(i_state,k_sd)
+            delta_ii_s2_(i_state,i_I) = delta_ii_s2_(i_state,i_I) - dIa_sla(i_state,k_sd) * ci_inv(i_state) * psi_non_ref_coef_transp(i_state,k_sd)
           enddo
         else
           delta_ii_(i_state,i_I)  = 0.d0
           do l_sd=1,idx_alpha(0)
             k_sd = idx_alpha(l_sd)
             delta_ij_(i_state,k_sd,i_I) = delta_ij_(i_state,k_sd,i_I) + 0.5d0*dIa_hla(i_state,k_sd)
+            delta_ij_s2_(i_state,k_sd,i_I) = delta_ij_s2_(i_state,k_sd,i_I) + 0.5d0*dIa_sla(i_state,k_sd)
           enddo
         endif
       enddo
       call omp_unset_lock( psi_ref_lock(i_I) )
     enddo
   enddo
-  deallocate (dIa_hla,hij_cache)
+  deallocate (dIa_hla,dIa_sla,hij_cache,sij_cache)
   deallocate(miniList, idx_miniList)
 end
 
@@ -315,6 +329,8 @@ end
 
  BEGIN_PROVIDER [ double precision, delta_ij, (N_states,N_det_non_ref,N_det_ref) ]
 &BEGIN_PROVIDER [ double precision, delta_ii, (N_states, N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ij_s2, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_s2, (N_states, N_det_ref) ]
   use bitmasks
   implicit none
   integer                        :: i, j, i_state
@@ -325,13 +341,36 @@ end
     do i = 1, N_det_ref
       do i_state = 1, N_states
         delta_ii(i_state,i)= delta_ii_mrcc(i_state,i)
+        delta_ii_s2(i_state,i)= delta_ii_s2_mrcc(i_state,i)
       enddo
       do j = 1, N_det_non_ref
         do i_state = 1, N_states
           delta_ij(i_state,j,i) = delta_ij_mrcc(i_state,j,i)
+          delta_ij_s2(i_state,j,i) = delta_ij_s2_mrcc(i_state,j,i)
         enddo
       end do
     end do
+
+    ! =-=-= BEGIN STATE AVERAGE
+!    do i = 1, N_det_ref
+!      delta_ii(:,i)= delta_ii_mrcc(1,i)
+!      delta_ii_s2(:,i)= delta_ii_s2_mrcc(1,i)
+!      do i_state = 2, N_states
+!        delta_ii(:,i) += delta_ii_mrcc(i_state,i)
+!        delta_ii_s2(:,i) += delta_ii_s2_mrcc(i_state,i)
+!      enddo
+!      do j = 1, N_det_non_ref
+!        delta_ij(:,j,i) = delta_ij_mrcc(1,j,i)
+!        delta_ij_s2(:,j,i) = delta_ij_s2_mrcc(1,j,i)
+!        do i_state = 2, N_states
+!          delta_ij(:,j,i) += delta_ij_mrcc(i_state,j,i)
+!          delta_ij_s2(:,j,i) += delta_ij_s2_mrcc(i_state,j,i)
+!        enddo
+!      end do
+!    end do
+!    delta_ij = delta_ij * (1.d0/dble(N_states))
+!    delta_ii = delta_ii * (1.d0/dble(N_states))
+    ! =-=-= END STATE AVERAGE
     !
     !       do i = 1, N_det_ref
     !         delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state) - delta_sub_ii(i,i_state)
@@ -343,10 +382,12 @@ end
     do i = 1, N_det_ref
       do i_state = 1, N_states
         delta_ii(i_state,i)= delta_ii_old(i_state,i)
+        delta_ii_s2(i_state,i)= delta_ii_s2_old(i_state,i)
       enddo
       do j = 1, N_det_non_ref
         do i_state = 1, N_states
           delta_ij(i_state,j,i) = delta_ij_old(i_state,j,i)
+          delta_ij_s2(i_state,j,i) = delta_ij_s2_old(i_state,j,i)
         enddo
       end do
     end do
@@ -354,10 +395,12 @@ end
     do i = 1, N_det_ref
       do i_state = 1, N_states
         delta_ii(i_state,i)= delta_mrcepa0_ii(i,i_state)
+        delta_ii_s2(i_state,i)= delta_mrcepa0_ii_s2(i,i_state)
       enddo
       do j = 1, N_det_non_ref
         do i_state = 1, N_states
           delta_ij(i_state,j,i) = delta_mrcepa0_ij(i,j,i_state)
+          delta_ij_s2(i_state,j,i) = delta_mrcepa0_ij_s2(i,j,i_state)
         enddo
       end do
     end do
@@ -547,28 +590,32 @@ END_PROVIDER
  
  
  BEGIN_PROVIDER [ double precision, delta_cas, (N_det_ref, N_det_ref, N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_cas_s2, (N_det_ref, N_det_ref, N_states) ]
   use bitmasks
   implicit none
   integer :: i,j,k
-  double precision :: Hjk, Hki, Hij
+  double precision :: Sjk,Hjk, Hki, Hij
   !double precision, external :: get_dij
   integer i_state, degree
    
   provide lambda_mrcc dIj
   do i_state = 1, N_states
-    !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,N_det_ref,dij)
+    !$OMP PARALLEL DO default(none) schedule(dynamic) private(j,k,Sjk,Hjk,Hki,degree) shared(lambda_mrcc,i_state, N_det_non_ref,psi_ref, psi_non_ref,N_int,delta_cas,delta_cas_s2,N_det_ref,dij)
     do i=1,N_det_ref
       do j=1,i
         call get_excitation_degree(psi_ref(1,1,i), psi_ref(1,1,j), degree, N_int)
         delta_cas(i,j,i_state) = 0d0
+        delta_cas_s2(i,j,i_state) = 0d0
         do k=1,N_det_non_ref
 
           call i_h_j(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Hjk)
+          call get_s2(psi_ref(1,1,j), psi_non_ref(1,1,k),N_int,Sjk)
           
           delta_cas(i,j,i_state) += Hjk * dij(i, k, i_state) ! * Hki * lambda_mrcc(i_state, k)
-          !print *, Hjk * get_dij(psi_ref(1,1,i), psi_non_ref(1,1,k), N_int), Hki * get_dij(psi_ref(1,1,j), psi_non_ref(1,1,k), N_int)
+          delta_cas_s2(i,j,i_state) += Sjk * dij(i, k, i_state) ! * Ski * lambda_mrcc(i_state, k)
         end do
         delta_cas(j,i,i_state) = delta_cas(i,j,i_state)
+        delta_cas_s2(j,i,i_state) = delta_cas_s2(i,j,i_state)
       end do
     end do
     !$OMP END PARALLEL DO
@@ -649,6 +696,8 @@ end function
 
  BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij, (N_det_ref,N_det_non_ref,N_states) ]
 &BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii, (N_det_ref,N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ij_s2, (N_det_ref,N_det_non_ref,N_states) ]
+&BEGIN_PROVIDER [ double precision, delta_mrcepa0_ii_s2, (N_det_ref,N_states) ]
   use bitmasks
   implicit none
   
@@ -656,7 +705,7 @@ end function
   integer                         :: p1,p2,h1,h2,s1,s2, p1_,p2_,h1_,h2_,s1_,s2_, sortRefIdx(N_det_ref)
   logical                         :: ok
   double precision                :: phase_iI, phase_Ik, phase_Jl, phase_IJ, phase_al, diI, hIi, hJi, delta_JI, dkI(1), HkI, ci_inv(1), dia_hla(1)
-  double precision                :: contrib, contrib2,  HIIi, HJk, wall
+  double precision                :: contrib, contrib2,  contrib_s2, contrib2_s2,  HIIi, HJk, wall
   integer, dimension(0:2,2,2)     :: exc_iI, exc_Ik, exc_IJ
   integer(bit_kind)               :: det_tmp(N_int, 2), made_hole(N_int,2), made_particle(N_int,2), myActive(N_int,2)
   integer(bit_kind),allocatable   :: sortRef(:,:,:)
@@ -681,14 +730,16 @@ end function
   ! To provide everything
   contrib = dij(1, 1, 1)
   
-  do i_state = 1, N_states
-    delta_mrcepa0_ii(:,:) = 0d0
-    delta_mrcepa0_ij(:,:,:) = 0d0
+  delta_mrcepa0_ii(:,:) = 0d0
+  delta_mrcepa0_ij(:,:,:) = 0d0
+  delta_mrcepa0_ii_s2(:,:) = 0d0
+  delta_mrcepa0_ij_s2(:,:,:) = 0d0
 
-    !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii)       &
-    !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2)       &
+  do i_state = 1, N_states
+    !$OMP PARALLEL DO default(none) schedule(dynamic) shared(delta_mrcepa0_ij, delta_mrcepa0_ii, delta_mrcepa0_ij_s2, delta_mrcepa0_ii_s2)       &
+    !$OMP private(m,i,II,J,k,degree,myActive,made_hole,made_particle,hjk,contrib,contrib2,contrib_s2,contrib2_s2) &
     !$OMP shared(active_sorb, psi_non_ref, psi_non_ref_coef, psi_ref, psi_ref_coef, cepa0_shortcut, det_cepa0_active)     &
-    !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas) &
+    !$OMP shared(N_det_ref, N_det_non_ref,N_int,det_cepa0_idx,lambda_mrcc,det_ref_active, delta_cas, delta_cas_s2) &
     !$OMP shared(notf,i_state, sortRef, sortRefIdx, dij)
     do blok=1,cepa0_shortcut(0)
     do i=cepa0_shortcut(blok), cepa0_shortcut(blok+1)-1
@@ -731,16 +782,21 @@ end function
 
 !          call i_h_j(psi_non_ref(1,1,det_cepa0_idx(k)),psi_ref(1,1,J),N_int,HJk)
           contrib = delta_cas(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state)
+          contrib_s2 = delta_cas_s2(II, J, i_state) * dij(J, det_cepa0_idx(k), i_state)
           
-          if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then
+          if(dabs(psi_ref_coef(J,i_state)).ge.1.d-3) then
             contrib2 = contrib / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state)
+            contrib2_s2 = contrib_s2 / psi_ref_coef(J, i_state) * psi_non_ref_coef(det_cepa0_idx(i),i_state)
             !$OMP ATOMIC
             delta_mrcepa0_ii(J,i_state) -= contrib2 
+            delta_mrcepa0_ii_s2(J,i_state) -= contrib2_s2 
           else
             contrib = contrib * 0.5d0
+            contrib_s2 = contrib_s2 * 0.5d0
           end if
           !$OMP ATOMIC
           delta_mrcepa0_ij(J, det_cepa0_idx(i), i_state) += contrib
+          delta_mrcepa0_ij_s2(J, det_cepa0_idx(i), i_state) += contrib_s2
 
         end do kloop
       end do
@@ -751,7 +807,7 @@ end function
   deallocate(idx_sorted_bit)
   call wall_time(wall)
   print *, "cepa0", wall, notf
-  !stop
+
 END_PROVIDER
 
 
@@ -839,7 +895,7 @@ END_PROVIDER
             call apply_excitation(psi_non_ref(1,1,i),exc_Ik,det_tmp,ok,N_int)
             if(ok) cycle
             contrib = delta_IJk * HIl * lambda_mrcc(i_state,l)   
-            if(dabs(psi_ref_coef(II,i_state)).ge.5.d-5) then
+            if(dabs(psi_ref_coef(II,i_state)).ge.1.d-3) then
               contrib2 = contrib / psi_ref_coef(II, i_state) * psi_non_ref_coef(l,i_state)
               !$OMP ATOMIC
               delta_sub_ii(II,i_state) -= contrib2
@@ -870,12 +926,14 @@ subroutine set_det_bit(det, p, s)
 end subroutine
 
 
-BEGIN_PROVIDER [ double precision, h_, (N_det_ref,N_det_non_ref) ]
+ BEGIN_PROVIDER [ double precision, h_cache, (N_det_ref,N_det_non_ref) ]
+&BEGIN_PROVIDER [ double precision, s2_cache, (N_det_ref,N_det_non_ref) ]
   implicit none
   integer :: i,j
   do i=1,N_det_ref
   do j=1,N_det_non_ref
-    call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_(i,j))
+    call i_h_j(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, h_cache(i,j))
+    call get_s2(psi_ref(1,1,i), psi_non_ref(1,1,j), N_int, s2_cache(i,j))
   end do
   end do
 END_PROVIDER
diff --git a/plugins/mrcepa0/dressing_slave.irp.f b/plugins/mrcepa0/dressing_slave.irp.f
index f1d6f029..9e9fa65a 100644
--- a/plugins/mrcepa0/dressing_slave.irp.f
+++ b/plugins/mrcepa0/dressing_slave.irp.f
@@ -37,7 +37,7 @@ subroutine mrsc2_dressing_slave(thread,iproc)
   integer(ZMQ_PTR), external     :: new_zmq_push_socket 
   integer(ZMQ_PTR)               :: zmq_socket_push 
 
-  double precision, allocatable  :: delta(:,:,:) 
+  double precision, allocatable  :: delta(:,:,:), delta_s2(:,:,:)
   
 
 
@@ -47,8 +47,8 @@ subroutine mrsc2_dressing_slave(thread,iproc)
   logical                         :: ok
   double precision                :: phase_iI, phase_Ik, phase_Jl, phase_Ji, phase_al
   double precision                :: diI, hIi, hJi, delta_JI, dkI, HkI, ci_inv(N_states), cj_inv(N_states)
-  double precision                :: contrib, wall, iwall
-  double precision, allocatable   :: dleat(:,:,:)
+  double precision                :: contrib, contrib_s2, wall, iwall
+  double precision, allocatable   :: dleat(:,:,:), dleat_s2(:,:,:)
   integer, dimension(0:2,2,2)     :: exc_iI, exc_Ik, exc_IJ
   integer(bit_kind)               :: det_tmp(N_int, 2), det_tmp2(N_int, 2), inac, virt
   integer, external               :: get_index_in_psi_det_sorted_bit, searchDet, detCmp
@@ -63,6 +63,7 @@ subroutine mrsc2_dressing_slave(thread,iproc)
   call connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread)
 
   allocate (dleat(N_states, N_det_non_ref, 2), delta(N_states,0:N_det_non_ref, 2))
+  allocate (dleat_s2(N_states, N_det_non_ref, 2), delta_s2(N_states,0:N_det_non_ref, 2))
   allocate(komon(0:N_det_non_ref))
 
   do 
@@ -74,10 +75,14 @@ subroutine mrsc2_dressing_slave(thread,iproc)
       cj_inv(i_state) = 1.d0 / psi_ref_coef(J,i_state)
     end do
     !delta = 0.d0
+    !delta_s2 = 0.d0
     n = 0
     delta(:,0,:) = 0d0
     delta(:,:nlink(J),1) = 0d0
     delta(:,:nlink(i_I),2) = 0d0
+    delta_s2(:,0,:) = 0d0
+    delta_s2(:,:nlink(J),1) = 0d0
+    delta_s2(:,:nlink(i_I),2) = 0d0
     komon(0) = 0
     komoned = .false.
     
@@ -121,8 +126,8 @@ subroutine mrsc2_dressing_slave(thread,iproc)
           end if
           i = det_cepa0_idx(linked(m, i_I))
           
-          if(h_(J,i) == 0.d0) cycle
-          if(h_(i_I,i) == 0.d0) cycle
+          if(h_cache(J,i) == 0.d0) cycle
+          if(h_cache(i_I,i) == 0.d0) cycle
           
           !ok = .false.
           !do i_state=1, N_states
@@ -144,10 +149,13 @@ subroutine mrsc2_dressing_slave(thread,iproc)
 !           if(I_i == J) phase_Ii = phase_Ji
           
           do i_state = 1,N_states
-            dkI = h_(J,i) * dij(i_I, i, i_state)!get_dij(psi_ref(1,1,i_I), psi_non_ref(1,1,i), N_int)
-            !dkI = h_(J,i) * h_(i_I,i) * lambda_mrcc(i_state, i)
+            dkI = h_cache(J,i) * dij(i_I, i, i_state)
             dleat(i_state, kn, 1) = dkI
             dleat(i_state, kn, 2) = dkI
+
+            dkI = s2_cache(J,i) * dij(i_I, i, i_state)
+            dleat_s2(i_state, kn, 1) = dkI
+            dleat_s2(i_state, kn, 2) = dkI
           end do
 
         end do
@@ -173,26 +181,32 @@ subroutine mrsc2_dressing_slave(thread,iproc)
           !if(lambda_mrcc(i_state, i) == 0d0) cycle
           
 
-          !contrib = h_(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al
+          !contrib = h_cache(i_I,k) * lambda_mrcc(i_state, k) * dleat(i_state, m, 2)! * phase_al
           contrib =  dij(i_I, k, i_state) * dleat(i_state, m, 2)
+          contrib_s2 =  dij(i_I, k, i_state) * dleat_s2(i_state, m, 2)
           delta(i_state,ll,1) += contrib
+          delta_s2(i_state,ll,1) += contrib_s2
           if(dabs(psi_ref_coef(i_I,i_state)).ge.5.d-5) then
             delta(i_state,0,1) -= contrib * ci_inv(i_state) * psi_non_ref_coef(l,i_state)
+            delta_s2(i_state,0,1) -= contrib_s2 * ci_inv(i_state) * psi_non_ref_coef(l,i_state)
           endif
           
           if(I_i == J) cycle
-          !contrib = h_(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al
+          !contrib = h_cache(J,l) * lambda_mrcc(i_state, l) * dleat(i_state, m, 1)! * phase_al
           contrib =  dij(J, l, i_state) * dleat(i_state, m, 1)
+          contrib_s2 =  dij(J, l, i_state) * dleat_s2(i_state, m, 1)
           delta(i_state,kk,2) += contrib
+          delta_s2(i_state,kk,2) += contrib_s2
           if(dabs(psi_ref_coef(J,i_state)).ge.5.d-5) then
             delta(i_state,0,2) -= contrib * cj_inv(i_state) * psi_non_ref_coef(k,i_state)
+            delta_s2(i_state,0,2) -= contrib_s2 * cj_inv(i_state) * psi_non_ref_coef(k,i_state)
           end if
         enddo !i_state
       end do ! while
     end do ! kk
 
       
-      call push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) 
+      call push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) 
       call task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id)
  
 !     end if
@@ -208,7 +222,7 @@ subroutine mrsc2_dressing_slave(thread,iproc)
 end
 
 
-subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id) 
+subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, delta_s2, task_id) 
   use f77_zmq 
   implicit none 
   BEGIN_DOC 
@@ -218,6 +232,7 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id)
   integer, intent(in)            :: i_I, J
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_push 
   double precision,intent(inout) :: delta(N_states, 0:N_det_non_ref, 2)
+  double precision,intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2)
   integer, intent(in)            :: task_id 
   integer                        :: rc , i_state,  i, kk, li
   integer,allocatable            :: idx(:,:)
@@ -278,6 +293,12 @@ subroutine push_mrsc2_results(zmq_socket_push, I_i, J, delta, task_id)
         print *, irp_here,  'f77_zmq_send( zmq_socket_push, delta, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)'
         stop 'error' 
       endif 
+
+      rc = f77_zmq_send( zmq_socket_push, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE)  ! delta_s2(1,0,1) = delta_I   delta_s2(1,0,2) = delta_J 
+      if (rc /=  (n(kk)+1)*8*N_states) then 
+        print *, irp_here,  'f77_zmq_send( zmq_socket_push, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)'
+        stop 'error' 
+      endif 
       
       rc = f77_zmq_send( zmq_socket_push, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) 
       if (rc /=  n(kk)*4) then 
@@ -305,7 +326,7 @@ end
 
 
 
-subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id) 
+subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id) 
   use f77_zmq 
   implicit none 
   BEGIN_DOC 
@@ -315,6 +336,7 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id)
   integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull 
   integer, intent(out)           :: i_I, J, n(2)
   double precision, intent(inout) :: delta(N_states, 0:N_det_non_ref, 2)
+  double precision, intent(inout) :: delta_s2(N_states, 0:N_det_non_ref, 2)
   integer, intent(out)           :: task_id 
   integer                        :: rc , i, kk
   integer,intent(inout) :: idx(N_det_non_ref,2)
@@ -346,9 +368,15 @@ subroutine pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id)
         stop 'error' 
       endif 
       
+      rc = f77_zmq_recv( zmq_socket_pull, delta_s2(1,0,kk), (n(kk)+1)*8*N_states, ZMQ_SNDMORE) 
+      if (rc /= (n(kk)+1)*8*N_states) then 
+        print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, delta_s2, (n(kk)+1)*8*N_states, ZMQ_SNDMORE)'
+        stop 'error' 
+      endif 
+      
       rc = f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE) 
       if (rc /= n(kk)*4) then 
-        print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, delta, n(kk)*4, ZMQ_SNDMORE)'
+        print *, irp_here,  'f77_zmq_recv( zmq_socket_pull, idx(1,kk), n(kk)*4, ZMQ_SNDMORE)'
         stop 'error' 
       endif 
     end if
@@ -372,7 +400,7 @@ end
 
 
 
-subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_)
+subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_,delta_ii_s2_,delta_ij_s2_)
   use f77_zmq
   implicit none
   BEGIN_DOC 
@@ -381,11 +409,13 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_)
  
   double precision,intent(inout)               :: delta_ij_(N_states,N_det_non_ref,N_det_ref) 
   double precision,intent(inout)               :: delta_ii_(N_states,N_det_ref)
+  double precision,intent(inout)               :: delta_ij_s2_(N_states,N_det_non_ref,N_det_ref) 
+  double precision,intent(inout)               :: delta_ii_s2_(N_states,N_det_ref)
 
 !   integer                        :: j,l
   integer                        :: rc 
    
-  double precision, allocatable  :: delta(:,:,:) 
+  double precision, allocatable  :: delta(:,:,:), delta_s2(:,:,:) 
    
   integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket 
   integer(ZMQ_PTR)               :: zmq_to_qp_run_socket 
@@ -401,49 +431,47 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_)
   
   delta_ii_(:,:) = 0d0
   delta_ij_(:,:,:) = 0d0
+  delta_ii_s2_(:,:) = 0d0
+  delta_ij_s2_(:,:,:) = 0d0
 
   zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() 
   zmq_socket_pull = new_zmq_pull_socket() 
  
-  allocate ( delta(N_states,0:N_det_non_ref,2) ) 
+  allocate ( delta(N_states,0:N_det_non_ref,2), delta_s2(N_states,0:N_det_non_ref,2) ) 
   
   allocate(idx(N_det_non_ref,2))
   more = 1 
   do while (more == 1) 
          
-    call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, task_id)
+    call pull_mrsc2_results(zmq_socket_pull, I_i, J, n, idx, delta, delta_s2, task_id)
     
 
       do l=1, n(1)
         do i_state=1,N_states
           delta_ij_(i_state,idx(l,1),i_I) += delta(i_state,l,1)
+          delta_ij_s2_(i_state,idx(l,1),i_I) += delta_s2(i_state,l,1)
         end do
       end do
       
       do l=1, n(2)
         do i_state=1,N_states
           delta_ij_(i_state,idx(l,2),J) += delta(i_state,l,2)
+          delta_ij_s2_(i_state,idx(l,2),J) += delta_s2(i_state,l,2)
         end do
       end do
 
     
-!     
-!       do l=1,nlink(J)
-!         do i_state=1,N_states
-!           delta_ij_(i_state,det_cepa0_idx(linked(l,J)),i_I) += delta(i_state,l,1)
-!           delta_ij_(i_state,det_cepa0_idx(linked(l,i_I)),j) += delta(i_state,l,2)
-!         end do
-!       end do
-!      
        if(n(1) /= 0) then 
        do i_state=1,N_states
          delta_ii_(i_state,i_I) += delta(i_state,0,1)
+         delta_ii_s2_(i_state,i_I) += delta_s2(i_state,0,1)
        end do
        end if
 
       if(n(2) /= 0) then 
        do i_state=1,N_states
          delta_ii_(i_state,J) += delta(i_state,0,2)
+         delta_ii_s2_(i_state,J) += delta_s2(i_state,0,2)
        end do
        end if
 
@@ -454,7 +482,7 @@ subroutine mrsc2_dressing_collector(delta_ii_,delta_ij_)
 
  
   enddo 
-  deallocate( delta )
+  deallocate( delta, delta_s2 )
  
   call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
   call end_zmq_pull_socket(zmq_socket_pull)
@@ -466,6 +494,8 @@ end
 
  BEGIN_PROVIDER [ double precision, delta_ij_old, (N_states,N_det_non_ref,N_det_ref) ]
 &BEGIN_PROVIDER [ double precision, delta_ii_old, (N_states,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ij_s2_old, (N_states,N_det_non_ref,N_det_ref) ]
+&BEGIN_PROVIDER [ double precision, delta_ii_s2_old, (N_states,N_det_ref) ]
   implicit none
   
   integer                         :: i_state, i, i_I, J, k, kk, degree, degree2, m, l, deg, ni, m2
@@ -574,10 +604,10 @@ end
 !  rc = pthread_create(collector_thread, mrsc2_dressing_collector)
   print *, nzer, ntot, float(nzer) / float(ntot)
   provide nproc
-  !$OMP PARALLEL DEFAULT(none)  SHARED(delta_ii_old,delta_ij_old)  PRIVATE(i) NUM_THREADS(nproc+1)
+  !$OMP PARALLEL DEFAULT(none)  SHARED(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old)  PRIVATE(i) NUM_THREADS(nproc+1)
       i = omp_get_thread_num()
       if (i==0) then
-        call mrsc2_dressing_collector(delta_ii_old,delta_ij_old)
+        call mrsc2_dressing_collector(delta_ii_old,delta_ij_old,delta_ii_s2_old,delta_ij_s2_old)
       else
         call mrsc2_dressing_slave_inproc(i)
       endif
diff --git a/plugins/mrcepa0/mrcc.irp.f b/plugins/mrcepa0/mrcc.irp.f
index a28d4be3..a5614942 100644
--- a/plugins/mrcepa0/mrcc.irp.f
+++ b/plugins/mrcepa0/mrcc.irp.f
@@ -16,7 +16,7 @@ program mrsc2sub
         psi_coef(i,j) = CI_eigenvectors(i,j)
       enddo
     enddo
-    TOUCH psi_coef 
+    SOFT_TOUCH psi_coef 
   endif
   call run(N_states,energy)
   if(do_pt2_end)then
diff --git a/plugins/mrcepa0/mrcepa0_general.irp.f b/plugins/mrcepa0/mrcepa0_general.irp.f
index 09c35e52..1b2e2fcb 100644
--- a/plugins/mrcepa0/mrcepa0_general.irp.f
+++ b/plugins/mrcepa0/mrcepa0_general.irp.f
@@ -21,7 +21,7 @@ subroutine run(N_st,energy)
   n_it_mrcc_max = n_it_max_dressed_ci
 
   if(n_it_mrcc_max == 1) then
-    do j=1,N_states_diag
+    do j=1,N_states
       do i=1,N_det
         psi_coef(i,j) = CI_eigenvectors_dressed(i,j)
       enddo
@@ -37,15 +37,21 @@ subroutine run(N_st,energy)
     lambda = 1.d0
     do while (delta_E > thresh_mrcc)
       iteration += 1
-      print *,  '===========================' 
-      print *,  'MRCEPA0 Iteration', iteration
-      print *,  '===========================' 
+      print *,  '===============================================' 
+      print *,  'MRCEPA0 Iteration', iteration, '/', n_it_mrcc_max
+      print *,  '===============================================' 
       print *,  ''
-      E_old = sum(ci_energy_dressed)
-      call write_double(6,ci_energy_dressed(1),"MRCEPA0 energy")
+      E_old = sum(ci_energy_dressed(1:N_states))
+      do i=1,N_st
+        call write_double(6,ci_energy_dressed(i),"MRCEPA0 energy")
+      enddo
       call diagonalize_ci_dressed(lambda)
-      E_new = sum(ci_energy_dressed)
-      delta_E = dabs(E_new - E_old)
+      E_new = sum(ci_energy_dressed(1:N_states))
+      delta_E = (E_new - E_old)/dble(N_states)
+      print *,  ''
+      call write_double(6,thresh_mrcc,"thresh_mrcc")
+      call write_double(6,delta_E,"delta_E")
+      delta_E = dabs(delta_E)
       call save_wavefunction
       call ezfio_set_mrcepa0_energy(ci_energy_dressed(1))
       if (iteration >= n_it_mrcc_max) then
diff --git a/src/Davidson/EZFIO.cfg b/src/Davidson/EZFIO.cfg
index b7c67465..7724400f 100644
--- a/src/Davidson/EZFIO.cfg
+++ b/src/Davidson/EZFIO.cfg
@@ -15,3 +15,16 @@ type: Strictly_positive_int
 doc: Number of micro-iterations before re-contracting
 default: 10
 interface: ezfio,provider,ocaml
+
+[state_following]
+type: logical
+doc: If true, the states are re-ordered to match the input states
+default: False
+interface: ezfio,provider,ocaml
+
+[disk_based_davidson]
+type: logical
+doc: If true, disk space is used to store the vectors
+default: False
+interface: ezfio,provider,ocaml
+
diff --git a/src/Davidson/diagonalization_hs2.irp.f b/src/Davidson/diagonalization_hs2.irp.f
index c70a086c..dccc8ee5 100644
--- a/src/Davidson/diagonalization_hs2.irp.f
+++ b/src/Davidson/diagonalization_hs2.irp.f
@@ -45,8 +45,11 @@ subroutine davidson_diag_hs2(dets_in,u_in,s2_out,dim_in,energies,sze,N_st,N_st_d
   !$OMP END DO 
   !$OMP END PARALLEL
 
-  call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
-!  call davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
+  if (disk_based_davidson) then
+    call davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
+  else
+    call davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_st,N_st_diag,Nint,iunit)
+  endif
   do i=1,N_st_diag
     s2_out(i) = S2_jj(i)
   enddo
@@ -84,8 +87,8 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   integer, intent(in)            :: dim_in, sze, N_st, N_st_diag, Nint
   integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
   double precision,  intent(in)  :: H_jj(sze)
-  double precision,  intent(inout)  :: S2_jj(sze)
-  integer,  intent(in)  :: iunit
+  double precision,  intent(inout) :: S2_jj(sze)
+  integer,  intent(in)           :: iunit
   double precision, intent(inout) :: u_in(dim_in,N_st_diag)
   double precision, intent(out)  :: energies(N_st_diag)
   
@@ -99,7 +102,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   integer                        :: k_pairs, kl
   
   integer                        :: iter2
-  double precision, allocatable  :: W(:,:),  U(:,:), S(:,:)
+  double precision, allocatable  :: W(:,:),  U(:,:), S(:,:), overlap(:,:)
   double precision, allocatable  :: y(:,:), h(:,:), lambda(:), s2(:)
   double precision, allocatable  :: c(:), s_(:,:), s_tmp(:,:)
   double precision               :: diag_h_mat_elem
@@ -108,17 +111,19 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   double precision               :: to_print(3,N_st)
   double precision               :: cpu, wall
   integer                        :: shift, shift2, itermax
+  double precision               :: r1, r2
+  logical                        :: state_ok(N_st_diag*davidson_sze_max)
   include 'constants.include.F'
   
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
   if (N_st_diag*3 > sze) then
-     print *,  'error in Davidson :'
-     print *,  'Increase n_det_max_jacobi to ', N_st_diag*3
-     stop -1
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_jacobi to ', N_st_diag*3
+    stop -1
   endif
-
+  
   PROVIDE nuclear_repulsion expected_s2
-
+  
   call write_time(iunit)
   call wall_time(wall)
   call cpu_time(cpu)
@@ -137,7 +142,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   write(iunit,'(A)') trim(write_buffer)
   write_buffer = ' Iter'
   do i=1,N_st
-    write_buffer = trim(write_buffer)//'      Energy          S^2      Residual'
+    write_buffer = trim(write_buffer)//'      Energy          S^2      Residual  '
   enddo
   write(iunit,'(A)') trim(write_buffer)
   write_buffer = '===== '
@@ -145,31 +150,32 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
     write_buffer = trim(write_buffer)//' ================ =========== ==========='
   enddo
   write(iunit,'(A)') trim(write_buffer)
-
-  integer, external :: align_double
+  
+  integer, external              :: align_double
   sze_8 = align_double(sze)
-
-  itermax = min(davidson_sze_max, sze/N_st_diag)
+  
+  itermax = max(3,min(davidson_sze_max, sze/N_st_diag))
   allocate(                                                          &
-      W(sze_8,N_st_diag*itermax),                           &
-      U(sze_8,N_st_diag*itermax),                           &
-      S(sze_8,N_st_diag*itermax),                           &
-      h(N_st_diag*itermax,N_st_diag*itermax),      &
-      y(N_st_diag*itermax,N_st_diag*itermax),      &
-      s_(N_st_diag*itermax,N_st_diag*itermax),     &
-      s_tmp(N_st_diag*itermax,N_st_diag*itermax),  &
+      W(sze_8,N_st_diag*itermax),                                    &
+      U(sze_8,N_st_diag*itermax),                                    &
+      S(sze_8,N_st_diag*itermax),                                    &
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      s_(N_st_diag*itermax,N_st_diag*itermax),                       &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
       residual_norm(N_st_diag),                                      &
-      c(N_st_diag*itermax),                                 &
-      s2(N_st_diag*itermax),                                &
+      c(N_st_diag*itermax),                                          &
+      s2(N_st_diag*itermax),                                         &
+      overlap(N_st_diag*itermax, N_st_diag*itermax),                 &
       lambda(N_st_diag*itermax))
   
-  h  = 0.d0
-  s_ = 0.d0
-  s_tmp = 0.d0
+  h = 0.d0
   U = 0.d0
   W = 0.d0
   S = 0.d0
   y = 0.d0
+  s_ = 0.d0
+  s_tmp = 0.d0
 
 
   ASSERT (N_st > 0)
@@ -183,21 +189,21 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
   
   converged = .False.
   
-  double precision               :: r1, r2
   do k=N_st+1,N_st_diag
-      do i=1,sze
-        call random_number(r1)
-        call random_number(r2)
-        r1 = dsqrt(-2.d0*dlog(r1))
-        r2 = dtwo_pi*r2
-        u_in(i,k) = r1*dcos(r2)
-      enddo
+    u_in(k,k) = 10.d0
+    do i=1,sze
+      call random_number(r1)
+      call random_number(r2)
+      r1 = dsqrt(-2.d0*dlog(r1))
+      r2 = dtwo_pi*r2
+      u_in(i,k) = r1*dcos(r2)
+    enddo
   enddo
   do k=1,N_st_diag
     call normalize(u_in(1,k),sze)
   enddo
-
-
+  
+  
   do while (.not.converged)
     
     do k=1,N_st_diag
@@ -205,12 +211,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
         U(i,k) = u_in(i,k)
       enddo
     enddo
-
+    
     do iter=1,itermax-1
       
       shift  = N_st_diag*(iter-1)
       shift2 = N_st_diag*iter
-
+      
       call ortho_qr(U,size(U,1),sze,shift2)
 
       ! Compute |W_k> = \sum_i |i><i|H|u_k>
@@ -233,8 +239,49 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
           0.d0, s_, size(s_,1))
 
 
+!      ! Diagonalize S^2
+!      ! ---------------
+!
+!      call lapack_diag(s2,y,s_,size(s_,1),shift2)
+!
+!
+!      ! Rotate H in the basis of eigenfunctions of s2
+!      ! ---------------------------------------------
+!
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('T','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+!
+!      ! Damp interaction between different spin states
+!      ! ------------------------------------------------
+!
+!      do k=1,shift2
+!        do l=1,shift2
+!          if (dabs(s2(k) - s2(l)) > 1.d0) then
+!            h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l))))
+!          endif
+!        enddo
+!      enddo
+!
+!      ! Rotate back H 
+!      ! -------------
+!
+!      call dgemm('N','T',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+
+      
       ! Diagonalize h
       ! -------------
+
       call lapack_diag(lambda,y,h,size(h,1),shift2)
       
       ! Compute S2 for each eigenvector
@@ -255,24 +302,72 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
       enddo
 
       if (s2_eig) then
-        logical :: state_ok(N_st_diag*davidson_sze_max)
-        do k=1,shift2
-          state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          do k=1,shift2
+            state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          enddo
+      else
+        do k=1,size(state_ok)
+          state_ok(k) = .True.
         enddo
+      endif
+
+      do k=1,shift2
+        if (.not. state_ok(k)) then
+          do l=k+1,shift2
+            if (state_ok(l)) then
+              call dswap(shift2, y(1,k), 1, y(1,l), 1)
+              call dswap(1, s2(k), 1, s2(l), 1)
+              call dswap(1, lambda(k), 1, lambda(l), 1)
+              state_ok(k) = .True.
+              state_ok(l) = .False.
+              exit
+            endif
+          enddo
+        endif
+      enddo
+
+      if (state_following) then
+
+        integer                        :: order(N_st_diag)
+        double precision               :: cmax
+
+        overlap = -1.d0
         do k=1,shift2
-          if (.not. state_ok(k)) then
-            do l=k+1,shift2
-              if (state_ok(l)) then
-                call dswap(shift2, y(1,k), 1, y(1,l), 1)
-                call dswap(1, s2(k), 1, s2(l), 1)
-                call dswap(1, lambda(k), 1, lambda(l), 1)
-                state_ok(k) = .True.
-                state_ok(l) = .False.
-                exit
-              endif
-            enddo
+          do i=1,shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
+        enddo
+        do k=1,N_st
+          cmax = -1.d0
+          do i=1,N_st
+            if (overlap(i,k) > cmax) then
+              cmax = overlap(i,k) 
+              order(k) = i
+            endif
+          enddo
+          do i=1,N_st_diag
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
           endif
         enddo
+        do k=1,N_st
+          overlap(k,1) = lambda(k)
+          overlap(k,2) = s2(k)
+        enddo
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            lambda(k) = overlap(l,1)
+            s2(k) = overlap(l,2)
+          endif
+        enddo
+        
       endif
 
 
@@ -291,10 +386,12 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
       
       do k=1,N_st_diag
         do i=1,sze
-          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
+          U(i,shift2+k) =  &
+            (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
               * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
-             )/max(H_jj(i) - lambda (k),1.d-2)
+            )/max(H_jj(i) - lambda (k),1.d-2)
         enddo
+
         if (k <= N_st) then
           residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
           to_print(1,k) = lambda(k) + nuclear_repulsion
@@ -339,7 +436,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sze,N_s
 
   deallocate (                                                       &
       W, residual_norm,                                              &
-      U,                                                             &
+      U, overlap,                                                    &
       c, S,                                                          &
       h,                                                             &
       y, s_, s_tmp,                                                  &
@@ -378,8 +475,8 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   integer, intent(in)            :: dim_in, sze, N_st, N_st_diag, Nint
   integer(bit_kind), intent(in)  :: dets_in(Nint,2,sze)
   double precision,  intent(in)  :: H_jj(sze)
-  double precision,  intent(inout)  :: S2_jj(sze)
-  integer,  intent(in)  :: iunit
+  double precision,  intent(inout) :: S2_jj(sze)
+  integer,  intent(in)           :: iunit
   double precision, intent(inout) :: u_in(dim_in,N_st_diag)
   double precision, intent(out)  :: energies(N_st_diag)
   
@@ -393,7 +490,7 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   integer                        :: k_pairs, kl
   
   integer                        :: iter2
-  double precision, pointer      :: W(:,:),  U(:,:), S(:,:)
+  double precision, pointer      :: W(:,:),  U(:,:), S(:,:), overlap(:,:)
   double precision, allocatable  :: y(:,:), h(:,:), lambda(:), s2(:)
   double precision, allocatable  :: c(:), s_(:,:), s_tmp(:,:)
   double precision               :: diag_h_mat_elem
@@ -401,18 +498,19 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   character*(16384)              :: write_buffer
   double precision               :: to_print(3,N_st)
   double precision               :: cpu, wall
+  logical                        :: state_ok(N_st_diag*davidson_sze_max)
   integer                        :: shift, shift2, itermax
   include 'constants.include.F'
   
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: U, W, S, y, h, lambda
   if (N_st_diag*3 > sze) then
-     print *,  'error in Davidson :'
-     print *,  'Increase n_det_max_jacobi to ', N_st_diag*3
-     stop -1
+    print *,  'error in Davidson :'
+    print *,  'Increase n_det_max_jacobi to ', N_st_diag*3
+    stop -1
   endif
-
+  
   PROVIDE nuclear_repulsion expected_s2
-
+  
   call write_time(iunit)
   call wall_time(wall)
   call cpu_time(cpu)
@@ -431,7 +529,7 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   write(iunit,'(A)') trim(write_buffer)
   write_buffer = ' Iter'
   do i=1,N_st
-    write_buffer = trim(write_buffer)//'      Energy          S^2      Residual'
+    write_buffer = trim(write_buffer)//'      Energy          S^2      Residual  '
   enddo
   write(iunit,'(A)') trim(write_buffer)
   write_buffer = '===== '
@@ -439,51 +537,52 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
     write_buffer = trim(write_buffer)//' ================ =========== ==========='
   enddo
   write(iunit,'(A)') trim(write_buffer)
-
-  integer, external :: align_double
-  integer :: fd(3)
-  type(c_ptr) :: c_pointer(3)
+  
+  integer, external              :: align_double
+  integer                        :: fd(3)
+  type(c_ptr)                    :: c_pointer(3)
   sze_8 = align_double(sze)
-
+  
   itermax = min(davidson_sze_max, sze/N_st_diag)
   
-  call mmap(                                                        &
-      trim(ezfio_work_dir)//'U',                                    &
-      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                                &
+  call mmap(                                                         &
+      trim(ezfio_work_dir)//'U',                                     &
+      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                   &
       8, fd(1), .False., c_pointer(1))
   call c_f_pointer(c_pointer(1), W, (/ sze_8,N_st_diag*itermax /) )
   
-  call mmap(                                                        &
-      trim(ezfio_work_dir)//'W',                                    &
-      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                                &
+  call mmap(                                                         &
+      trim(ezfio_work_dir)//'W',                                     &
+      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                   &
       8, fd(2), .False., c_pointer(2))
   call c_f_pointer(c_pointer(2), U, (/ sze_8,N_st_diag*itermax /) )
   
-  call mmap(                                                        &
-      trim(ezfio_work_dir)//'S',                                    &
-      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                                &
+  call mmap(                                                         &
+      trim(ezfio_work_dir)//'S',                                     &
+      (/ int(sze_8,8),int(N_st_diag*itermax,8) /),                   &
       8, fd(3), .False., c_pointer(3))
   call c_f_pointer(c_pointer(3), S, (/ sze_8,N_st_diag*itermax /) )
   
   allocate(                                                          &
-      h(N_st_diag*itermax,N_st_diag*itermax),      &
-      y(N_st_diag*itermax,N_st_diag*itermax),      &
-      s_(N_st_diag*itermax,N_st_diag*itermax),     &
-      s_tmp(N_st_diag*itermax,N_st_diag*itermax),  &
+      h(N_st_diag*itermax,N_st_diag*itermax),                        &
+      y(N_st_diag*itermax,N_st_diag*itermax),                        &
+      s_(N_st_diag*itermax,N_st_diag*itermax),                       &
+      s_tmp(N_st_diag*itermax,N_st_diag*itermax),                    &
+      overlap(N_st_diag*itermax, N_st_diag*itermax),                 &
       residual_norm(N_st_diag),                                      &
-      c(N_st_diag*itermax),                                 &
-      s2(N_st_diag*itermax),                                &
+      c(N_st_diag*itermax),                                          &
+      s2(N_st_diag*itermax),                                         &
       lambda(N_st_diag*itermax))
   
-  h  = 0.d0
-  s_ = 0.d0
-  s_tmp = 0.d0
+  h = 0.d0
   U = 0.d0
   W = 0.d0
   S = 0.d0
   y = 0.d0
-
-
+  s_ = 0.d0
+  s_tmp = 0.d0
+  
+  
   ASSERT (N_st > 0)
   ASSERT (N_st_diag >= N_st)
   ASSERT (sze > 0)
@@ -497,6 +596,7 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
   
   double precision               :: r1, r2
   do k=N_st+1,N_st_diag
+      u_in(k,k) = 10.d0
       do i=1,sze
         call random_number(r1)
         r1 = dsqrt(-2.d0*dlog(r1))
@@ -546,6 +646,45 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
             0.d0, s_(shift+1,1), size(s_,1))
       enddo
 
+!      ! Diagonalize S^2
+!      ! ---------------
+!
+!      call lapack_diag(s2,y,s_,size(s_,1),shift2)
+!
+!
+!      ! Rotate H in the basis of eigenfunctions of s2
+!      ! ---------------------------------------------
+!
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('T','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+!
+!      ! Damp interaction between different spin states
+!      ! ------------------------------------------------
+!
+!      do k=1,shift2
+!        do l=1,shift2
+!          if (dabs(s2(k) - s2(l)) > 1.d0) then
+!            h(k,l) = h(k,l)*(max(0.d0,1.d0 - dabs(s2(k) - s2(l))))
+!          endif
+!        enddo
+!      enddo
+!
+!      ! Rotate back H 
+!      ! -------------
+!
+!      call dgemm('N','T',shift2,shift2,shift2,                       &
+!          1.d0, h, size(h,1), y, size(y,1),                          &
+!          0.d0, s_tmp, size(s_tmp,1))
+!      
+!      call dgemm('N','N',shift2,shift2,shift2,                       &
+!          1.d0, y, size(y,1), s_tmp, size(s_tmp,1),                  &
+!          0.d0, h, size(h,1))
+
 
       ! Diagonalize h
       ! -------------
@@ -568,25 +707,74 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
         s2(k) = s_(k,k) + S_z2_Sz
       enddo
 
+
       if (s2_eig) then
-        logical :: state_ok(N_st_diag*davidson_sze_max)
+          do k=1,shift2
+            state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          enddo
+      else
+        state_ok(k) = .True.
+      endif
+
+      do k=1,shift2
+        if (.not. state_ok(k)) then
+          do l=k+1,shift2
+            if (state_ok(l)) then
+              call dswap(shift2, y(1,k), 1, y(1,l), 1)
+              call dswap(1, s2(k), 1, s2(l), 1)
+              call dswap(1, lambda(k), 1, lambda(l), 1)
+              state_ok(k) = .True.
+              state_ok(l) = .False.
+              exit
+            endif
+          enddo
+        endif
+      enddo
+
+      if (state_following) then
+
+        ! Compute overlap with U_in
+        ! -------------------------
+        
+        integer                        :: order(N_st_diag)
+        double precision               :: cmax
+        overlap = -1.d0
         do k=1,shift2
-          state_ok(k) = (dabs(s2(k)-expected_s2) < 0.6d0)
+          do i=1,shift2
+            overlap(k,i) = dabs(y(k,i))
+          enddo
         enddo
-        do k=1,shift2
-          if (.not. state_ok(k)) then
-            do l=k+1,shift2
-              if (state_ok(l)) then
-                call dswap(shift2, y(1,k), 1, y(1,l), 1)
-                call dswap(1, s2(k), 1, s2(l), 1)
-                call dswap(1, lambda(k), 1, lambda(l), 1)
-                state_ok(k) = .True.
-                state_ok(l) = .False.
-                exit
-              endif
-            enddo
+        do k=1,N_st
+          cmax = -1.d0
+          do i=1,shift2
+            if (overlap(i,k) > cmax) then
+              cmax = overlap(i,k) 
+              order(k) = i
+            endif
+          enddo
+          do i=1,shift2
+            overlap(order(k),i) = -1.d0
+          enddo
+        enddo
+        overlap = y
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            y(1:shift2,k) = overlap(1:shift2,l)
           endif
         enddo
+        do k=1,N_st
+          overlap(k,1) = lambda(k)
+          overlap(k,2) = s2(k)
+        enddo
+        do k=1,N_st
+          l = order(k)
+          if (k /= l) then
+            lambda(k) = overlap(l,1)
+            s2(k) = overlap(l,2)
+          endif
+        enddo
+        
       endif
 
 
@@ -604,11 +792,31 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
       ! -----------------------------------------
       
       do k=1,N_st_diag
-        do i=1,sze
-          U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
-              * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
-             )/max(H_jj(i) - lambda (k),1.d-2)
-        enddo
+        if (state_ok(k)) then
+          do i=1,sze
+            U(i,shift2+k) = (lambda(k) * U(i,shift2+k) - W(i,shift2+k) )      &
+                * (1.d0 + s2(k) * U(i,shift2+k) - S(i,shift2+k) - S_z2_Sz &
+              )/max(H_jj(i) - lambda (k),1.d-2)
+          enddo
+        else
+          ! Randomize components with bad <S2>
+            do i=1,sze-2,2
+              call random_number(r1)
+              call random_number(r2)
+              r1 = dsqrt(-2.d0*dlog(r1))
+              r2 = dtwo_pi*r2
+              U(i,shift2+k) = r1*dcos(r2)
+              U(i+1,shift2+k) = r1*dsin(r2)
+            enddo
+            do i=sze-2+1,sze
+              call random_number(r1)
+              call random_number(r2)
+              r1 = dsqrt(-2.d0*dlog(r1))
+              r2 = dtwo_pi*r2
+              U(i,shift2+k) = r1*dcos(r2)
+            enddo
+        endif
+
         if (k <= N_st) then
           residual_norm(k) = u_dot_u(U(1,shift2+k),sze)
           to_print(1,k) = lambda(k) + nuclear_repulsion
@@ -665,7 +873,7 @@ subroutine davidson_diag_hjj_sjj_mmap(dets_in,u_in,H_jj,S2_jj,energies,dim_in,sz
 
   deallocate (                                                       &
       residual_norm,                                                 &
-      c,                                                             &
+      c, overlap,                                                    &
       h,                                                             &
       y, s_, s_tmp,                                                  &
       lambda                                                         &
diff --git a/src/Davidson/diagonalize_CI.irp.f b/src/Davidson/diagonalize_CI.irp.f
index 3b2c9ed0..e1b67438 100644
--- a/src/Davidson/diagonalize_CI.irp.f
+++ b/src/Davidson/diagonalize_CI.irp.f
@@ -40,6 +40,7 @@ END_PROVIDER
    double precision, allocatable  :: e_array(:)
    integer, allocatable           :: iorder(:)
    
+   PROVIDE threshold_davidson
    ! Guess values for the "N_states" states of the CI_eigenvectors
    do j=1,min(N_states,N_det)
      do i=1,N_det
diff --git a/src/Davidson/u0Hu0.irp.f b/src/Davidson/u0Hu0.irp.f
index dd5ab1ab..117e704e 100644
--- a/src/Davidson/u0Hu0.irp.f
+++ b/src/Davidson/u0Hu0.irp.f
@@ -88,9 +88,12 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
   
   !$OMP DO SCHEDULE(dynamic)
   do sh=1,shortcut(0,1)
-    do sh2=sh,shortcut(0,1)
-      exa = 0
-      do ni=1,Nint
+    do sh2=1,shortcut(0,1)
+      exa = popcnt(xor(version(1,sh,1), version(1,sh2,1)))
+      if(exa > 2) then
+        cycle
+      end if
+      do ni=2,Nint
         exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
       end do
       if(exa > 2) then
@@ -99,29 +102,27 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
       
       do i=shortcut(sh,1),shortcut(sh+1,1)-1
         org_i = sort_idx(i,1)
-        if(sh==sh2) then
-          endi = i-1
-        else
-          endi = shortcut(sh2+1,1)-1
-        end if
         do ni=1,Nint
           sorted_i(ni) = sorted(ni,i,1)
         enddo
         
-        do j=shortcut(sh2,1),endi
+ jloop: do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
           org_j = sort_idx(j,1)
-          ext = exa
-          do ni=1,Nint
-            ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
-          end do
-          if(ext <= 4) then
-            call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
-            do istate=1,N_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
-              vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
-            enddo
+          ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
+          if(ext > 4) then
+            cycle jloop
           endif
-        enddo
+          do ni=2,Nint
+            ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
+            if(ext > 4) then
+              cycle jloop
+            endif
+          end do
+          call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
+          do istate=1,N_st
+            vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
+          enddo
+        enddo jloop
       enddo
     enddo
   enddo
@@ -131,19 +132,19 @@ subroutine H_u_0_nstates(v_0,u_0,H_jj,n,keys_tmp,Nint,N_st,sze_8)
   do sh=1,shortcut(0,2)
     do i=shortcut(sh,2),shortcut(sh+1,2)-1
       org_i = sort_idx(i,2)
-      do j=shortcut(sh,2),i-1
+      do j=shortcut(sh,2),shortcut(sh+1,2)-1
         org_j = sort_idx(j,2)
-        ext = 0
-        do ni=1,Nint
+        ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
+        do ni=2,Nint
           ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
         end do
-        if(ext == 4) then
-          call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
-          do istate=1,N_st
-            vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
-            vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
-          enddo
-        end if
+        if(ext /= 4) then
+          cycle
+        endif
+        call i_H_j(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),Nint,hij)
+        do istate=1,N_st
+          vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
+        enddo
       end do
     end do
   enddo
@@ -313,7 +314,7 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
   integer :: blockb, blockb2, istep
   double precision :: ave_workload, workload, target_workload_inv
   
-  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut
+  !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: vt, ut, st
 
   N_st_8 = align_double(N_st)
 
@@ -328,49 +329,62 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
   v_0 = 0.d0
   s_0 = 0.d0
 
-  do i=1,n
-    do istate=1,N_st
-      ut(istate,i) = u_0(i,istate)
-    enddo
-  enddo
-
   call sort_dets_ab_v(keys_tmp, sorted(1,1,1), sort_idx(1,1), shortcut(0,1), version(1,1,1), n, Nint)
   call sort_dets_ba_v(keys_tmp, sorted(1,1,2), sort_idx(1,2), shortcut(0,2), version(1,1,2), n, Nint)
 
   !$OMP PARALLEL DEFAULT(NONE)                                       &
       !$OMP PRIVATE(i,hij,s2,j,k,jj,vt,st,ii,sh,sh2,ni,exa,ext,org_i,org_j,endi,sorted_i,istate)&
-      !$OMP SHARED(n,keys_tmp,ut,Nint,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) 
+      !$OMP SHARED(n,keys_tmp,ut,Nint,u_0,v_0,s_0,sorted,shortcut,sort_idx,version,N_st,N_st_8) 
   allocate(vt(N_st_8,n),st(N_st_8,n))
   Vt = 0.d0
   St = 0.d0
 
-  !$OMP DO SCHEDULE(static,1)
+  !$OMP DO
+  do i=1,n
+    do istate=1,N_st
+      ut(istate,i) = u_0(sort_idx(i,2),istate)
+    enddo
+  enddo
+  !$OMP END DO
+
+  !$OMP DO SCHEDULE(dynamic) 
   do sh=1,shortcut(0,2)
     do i=shortcut(sh,2),shortcut(sh+1,2)-1
       org_i = sort_idx(i,2)
-      do j=shortcut(sh,2),i-1
+      do j=shortcut(sh,2),shortcut(sh+1,2)-1
         org_j = sort_idx(j,2)
-        ext = 0
-        do ni=1,Nint
+        ext = popcnt(xor(sorted(1,i,2), sorted(1,j,2)))
+        if (ext > 4) cycle
+        do ni=2,Nint
           ext = ext + popcnt(xor(sorted(ni,i,2), sorted(ni,j,2)))
+          if (ext > 4) exit
         end do
         if(ext == 4) then
             call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
             call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
             do istate=1,n_st
-              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
-              vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
-              st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j)
-              st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i)
+              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
+              st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
             enddo
         end if
       end do
     end do
   enddo
-  !$OMP END DO NOWAIT
+  !$OMP END DO
+
+  !$OMP DO
+  do i=1,n
+    do istate=1,N_st
+      ut(istate,i) = u_0(sort_idx(i,1),istate)
+    enddo
+  enddo
+  !$OMP END DO
+
+  !$OMP DO SCHEDULE(dynamic)
   do sh=1,shortcut(0,1)
-    !$OMP DO SCHEDULE(static,1)
-    do sh2=sh,shortcut(0,1)
+    do sh2=1,shortcut(0,1)
+      if (sh==sh2) cycle
+
       exa = 0
       do ni=1,Nint
         exa = exa + popcnt(xor(version(ni,sh,1), version(ni,sh2,1)))
@@ -381,44 +395,102 @@ subroutine H_S2_u_0_nstates(v_0,s_0,u_0,H_jj,S2_jj,n,keys_tmp,Nint,N_st,sze_8)
 
       do i=shortcut(sh,1),shortcut(sh+1,1)-1
         org_i = sort_idx(i,1)
-        if(sh==sh2) then
-          endi = i-1
-        else
-          endi = shortcut(sh2+1,1)-1
-        end if
         do ni=1,Nint
           sorted_i(ni) = sorted(ni,i,1)
         enddo
 
-        do j=shortcut(sh2,1),endi
-          ext = exa
-          do ni=1,Nint
+        do j=shortcut(sh2,1),shortcut(sh2+1,1)-1
+          ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
+          if (ext > 4) cycle
+          do ni=2,Nint
             ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
+            if (ext > 4) exit
           end do
           if(ext <= 4) then
             org_j = sort_idx(j,1)
             call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
             if (hij /= 0.d0) then
               do istate=1,n_st
-                vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,org_j)
-                vt (istate,org_j) = vt (istate,org_j) + hij*ut(istate,org_i)
+                vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
               enddo
             endif
             if (ext /= 2) then
               call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
               if (s2 /= 0.d0) then
                 do istate=1,n_st
-                  st (istate,org_i) = st (istate,org_i) + s2*ut(istate,org_j)
-                  st (istate,org_j) = st (istate,org_j) + s2*ut(istate,org_i)
+                  st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
                 enddo
               endif
             endif
           endif
         enddo
+        
+      enddo
+    enddo
+
+    exa = 0
+
+    do i=shortcut(sh,1),shortcut(sh+1,1)-1
+      org_i = sort_idx(i,1)
+      do ni=1,Nint
+        sorted_i(ni) = sorted(ni,i,1)
+      enddo
+
+      do j=shortcut(sh,1),i-1
+        ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
+        if (ext > 4) cycle
+        do ni=2,Nint
+          ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
+          if (ext > 4) exit
+        end do
+        if(ext <= 4) then
+          org_j = sort_idx(j,1)
+          call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
+          if (hij /= 0.d0) then
+            do istate=1,n_st
+              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
+            enddo
+          endif
+          if (ext /= 2) then
+            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
+            if (s2 /= 0.d0) then
+              do istate=1,n_st
+                st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
+              enddo
+            endif
+          endif
+        endif
+      enddo
+      
+      do j=i+1,shortcut(sh+1,1)-1
+        if (i==j) cycle
+        ext = exa + popcnt(xor(sorted_i(1), sorted(1,j,1)))
+        if (ext > 4) cycle
+        do ni=2,Nint
+          ext = ext + popcnt(xor(sorted_i(ni), sorted(ni,j,1)))
+          if (ext > 4) exit
+        end do
+        if(ext <= 4) then
+          org_j = sort_idx(j,1)
+          call i_h_j (keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,hij)
+          if (hij /= 0.d0) then
+            do istate=1,n_st
+              vt (istate,org_i) = vt (istate,org_i) + hij*ut(istate,j)
+            enddo
+          endif
+          if (ext /= 2) then
+            call get_s2(keys_tmp(1,1,org_j),keys_tmp(1,1,org_i),nint,s2)
+            if (s2 /= 0.d0) then
+              do istate=1,n_st
+                st (istate,org_i) = st (istate,org_i) + s2*ut(istate,j)
+              enddo
+            endif
+          endif
+        endif
       enddo
     enddo
-    !$OMP END DO NOWAIT
   enddo
+  !$OMP END DO 
 
   !$OMP CRITICAL (u0Hu0)
   do istate=1,N_st
diff --git a/src/Determinants/determinants.irp.f b/src/Determinants/determinants.irp.f
index 39b0f58e..bed3327d 100644
--- a/src/Determinants/determinants.irp.f
+++ b/src/Determinants/determinants.irp.f
@@ -438,8 +438,12 @@ end
  do i=1,N_states
    psi_coef_min(i) = minval(psi_coef(:,i))
    psi_coef_max(i) = maxval(psi_coef(:,i))
-   abs_psi_coef_min(i) = dabs(psi_coef_min(i))
-   abs_psi_coef_max(i) = dabs(psi_coef_max(i))
+   abs_psi_coef_min(i) = minval( dabs(psi_coef(:,i)) )
+   abs_psi_coef_max(i) = maxval( dabs(psi_coef(:,i)) )
+   call write_double(6,psi_coef_max(i), 'Max coef')
+   call write_double(6,psi_coef_min(i), 'Min coef')
+   call write_double(6,abs_psi_coef_max(i), 'Max abs coef')
+   call write_double(6,abs_psi_coef_min(i), 'Min abs coef')
  enddo
 
 END_PROVIDER
@@ -760,37 +764,85 @@ subroutine apply_excitation(det, exc, res, ok, Nint)
   ok = .false.
   degree = exc(0,1,1) + exc(0,1,2)
   
-  if(.not. (degree > 0 .and. degree <= 2)) then
-    print *, degree
-    print *, "apply ex"
-    STOP
-  endif
-  
-  call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
+!  call decode_exc(exc,degree,h1,p1,h2,p2,s1,s2)
+! INLINE
+  select case(degree)
+    case(2)
+      if (exc(0,1,1) == 2) then
+        h1 = exc(1,1,1)
+        h2 = exc(2,1,1)
+        p1 = exc(1,2,1)
+        p2 = exc(2,2,1)
+        s1 = 1
+        s2 = 1
+      else if (exc(0,1,2) == 2) then
+        h1 = exc(1,1,2)
+        h2 = exc(2,1,2)
+        p1 = exc(1,2,2)
+        p2 = exc(2,2,2)
+        s1 = 2
+        s2 = 2
+      else
+        h1 = exc(1,1,1)
+        h2 = exc(1,1,2)
+        p1 = exc(1,2,1)
+        p2 = exc(1,2,2)
+        s1 = 1
+        s2 = 2
+      endif
+    case(1)
+      if (exc(0,1,1) == 1) then
+        h1 = exc(1,1,1)
+        h2 = 0
+        p1 = exc(1,2,1)
+        p2 = 0
+        s1 = 1
+        s2 = 0
+      else
+        h1 = exc(1,1,2)
+        h2 = 0
+        p1 = exc(1,2,2)
+        p2 = 0
+        s1 = 2
+        s2 = 0
+      endif
+    case(0)
+      h1 = 0
+      p1 = 0
+      h2 = 0
+      p2 = 0
+      s1 = 0
+      s2 = 0
+    case default
+      print *, degree
+      print *, "apply ex"
+      STOP
+  end select
+! END INLINE
+
   res = det 
   
-  ii = (h1-1)/bit_kind_size + 1 
-  pos = mod(h1-1, 64)!iand(h1-1,bit_kind_size-1) ! mod 64
-  if(iand(det(ii, s1), ishft(1_bit_kind, pos)) == 0_8) return
+  ii = ishft(h1-1,-bit_kind_shift) + 1 
+  pos = h1-1-ishft(ii-1,bit_kind_shift)
+  if(iand(det(ii, s1), ibset(0_bit_kind, pos)) == 0_8) return
   res(ii, s1) = ibclr(res(ii, s1), pos)
   
-  ii = (p1-1)/bit_kind_size + 1 
-  pos = mod(p1-1, 64)!iand(p1-1,bit_kind_size-1)
+  ii = ishft(p1-1,-bit_kind_shift) + 1 
+  pos = p1-1-ishft(ii-1,bit_kind_shift)
   if(iand(det(ii, s1), ishft(1_bit_kind, pos)) /= 0_8) return
   res(ii, s1) = ibset(res(ii, s1), pos)
   
   if(degree == 2) then
-    ii = (h2-1)/bit_kind_size + 1 
-    pos = mod(h2-1, 64)!iand(h2-1,bit_kind_size-1)
+    ii = ishft(h2-1,-bit_kind_shift) + 1 
+    pos = h2-1-ishft(ii-1,bit_kind_shift)
     if(iand(det(ii, s2), ishft(1_bit_kind, pos)) == 0_8) return
     res(ii, s2) = ibclr(res(ii, s2), pos)
     
-    ii = (p2-1)/bit_kind_size + 1 
-    pos = mod(p2-1, 64)!iand(p2-1,bit_kind_size-1)
+    ii = ishft(p2-1,-bit_kind_shift) + 1 
+    pos = p2-1-ishft(ii-1,bit_kind_shift)
     if(iand(det(ii, s2), ishft(1_bit_kind, pos)) /= 0_8) return
     res(ii, s2) = ibset(res(ii, s2), pos)
   endif
-
   ok = .true.
 end subroutine
 
@@ -809,14 +861,14 @@ subroutine apply_particles(det, s1, p1, s2, p2, res, ok, Nint)
   res = det 
   
   if(p1 /= 0) then
-  ii = (p1-1)/bit_kind_size + 1 
-  pos = mod(p1-1, 64)!iand(p1-1,bit_kind_size-1)
+  ii = ishft(p1-1,-bit_kind_shift) + 1 
+  pos = p1-1-ishft(ii-1,bit_kind_shift)
   if(iand(det(ii, s1), ishft(1_bit_kind, pos)) /= 0_8) return
   res(ii, s1) = ibset(res(ii, s1), pos)
   end if
 
-  ii = (p2-1)/bit_kind_size + 1 
-  pos = mod(p2-1, 64)!iand(p2-1,bit_kind_size-1)
+  ii = ishft(p2-1,-bit_kind_shift) + 1 
+  pos = p2-1-ishft(ii-1,bit_kind_shift)
   if(iand(det(ii, s2), ishft(1_bit_kind, pos)) /= 0_8) return
   res(ii, s2) = ibset(res(ii, s2), pos)
 
@@ -838,14 +890,14 @@ subroutine apply_holes(det, s1, h1, s2, h2, res, ok, Nint)
   res = det 
   
   if(h1 /= 0) then
-  ii = (h1-1)/bit_kind_size + 1 
-  pos = mod(h1-1, 64)!iand(h1-1,bit_kind_size-1)
+  ii = ishft(h1-1,-bit_kind_shift) + 1 
+  pos = h1-1-ishft(ii-1,bit_kind_shift)
   if(iand(det(ii, s1), ishft(1_bit_kind, pos)) == 0_8) return
   res(ii, s1) = ibclr(res(ii, s1), pos)
   end if
 
-  ii = (h2-1)/bit_kind_size + 1 
-  pos = mod(h2-1, 64)!iand(h2-1,bit_kind_size-1)
+  ii = ishft(h2-1,-bit_kind_shift) + 1 
+  pos = h2-1-ishft(ii-1,bit_kind_shift)
   if(iand(det(ii, s2), ishft(1_bit_kind, pos)) == 0_8) return
   res(ii, s2) = ibclr(res(ii, s2), pos)
 
@@ -865,8 +917,8 @@ subroutine apply_particle(det, s1, p1, res, ok, Nint)
   ok = .false.
   res = det 
   
-  ii = (p1-1)/bit_kind_size + 1 
-  pos = mod(p1-1, 64)!iand(p1-1,bit_kind_size-1)
+  ii = ishft(p1-1,-bit_kind_shift) + 1 
+  pos = p1-1-ishft(ii-1,bit_kind_shift)
   if(iand(det(ii, s1), ishft(1_bit_kind, pos)) /= 0_8) return
   res(ii, s1) = ibset(res(ii, s1), pos)
 
@@ -887,8 +939,8 @@ subroutine apply_hole(det, s1, h1, res, ok, Nint)
   ok = .false.
   res = det 
   
-  ii = (h1-1)/bit_kind_size + 1 
-  pos = mod(h1-1, 64)!iand(h1-1,bit_kind_size-1)
+  ii = ishft(h1-1,-bit_kind_shift) + 1 
+  pos = h1-1-ishft(ii-1,bit_kind_shift)
   if(iand(det(ii, s1), ishft(1_bit_kind, pos)) == 0_8) return
   res(ii, s1) = ibclr(res(ii, s1), pos)
 
diff --git a/src/MRPT_Utils/MRPT_Utils_main.irp.f b/src/MRPT_Utils/MRPT_Utils_main.irp.f
deleted file mode 100644
index fb17f054..00000000
--- a/src/MRPT_Utils/MRPT_Utils_main.irp.f
+++ /dev/null
@@ -1,3 +0,0 @@
-    program MRPT_Utils_main
-      print *, "I'm a core module, I need an main! (maybe a stupid rule)"
-    end program MRPT_Utils_main
diff --git a/src/Utils/map_module.f90 b/src/Utils/map_module.f90
index 4a83582f..80260233 100644
--- a/src/Utils/map_module.f90
+++ b/src/Utils/map_module.f90
@@ -622,7 +622,7 @@ subroutine search_key_big_interval(key,X,sze,idx,ibegin_in,iend_in)
     
     istep = ishft(iend-ibegin,-1)
     idx = ibegin + istep
-    do while (istep > 16)
+    do while (istep > 64)
       idx = ibegin + istep
       ! TODO : Cache misses 
       if (cache_key < X(idx)) then
@@ -660,8 +660,8 @@ subroutine search_key_big_interval(key,X,sze,idx,ibegin_in,iend_in)
       endif
     enddo
     idx = ibegin
-    if (min(iend_in,sze) > ibegin+16) then
-      iend = ibegin+16
+    if (min(iend_in,sze) > ibegin+64) then
+      iend = ibegin+64
       do while (cache_key > X(idx))
         idx = idx+1
       end do
@@ -730,7 +730,7 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in
     
     istep = ishft(iend-ibegin,-1)
     idx = ibegin + istep
-    do while (istep > 16)
+    do while (istep > 64)
       idx = ibegin + istep
       if (cache_key < X(idx)) then
         iend = idx
@@ -771,8 +771,8 @@ subroutine search_key_value_big_interval(key,value,X,Y,sze,idx,ibegin_in,iend_in
     enddo
     idx = ibegin
     value = Y(idx)
-    if (min(iend_in,sze) > ibegin+16) then
-      iend = ibegin+16
+    if (min(iend_in,sze) > ibegin+64) then
+      iend = ibegin+64
       do while (cache_key > X(idx))
         idx = idx+1
         value = Y(idx)
diff --git a/tests/bats/cassd.bats b/tests/bats/cassd.bats
index a1f1a736..2a8fabc2 100644
--- a/tests/bats/cassd.bats
+++ b/tests/bats/cassd.bats
@@ -13,7 +13,7 @@ source $QP_ROOT/tests/bats/common.bats.sh
   qp_set_mo_class $INPUT -core "[1]" -inact "[2,5]" -act "[3,4,6,7]" -virt "[8-24]" 
   qp_run cassd_zmq $INPUT  
   energy="$(ezfio get cas_sd_zmq energy_pt2)"
-  eq $energy -76.2311177912495   2.E-5
+  eq $energy -76.231084536315 5.E-5
 
   ezfio set determinants n_det_max 2048 
   ezfio set determinants read_wf True
@@ -21,6 +21,6 @@ source $QP_ROOT/tests/bats/common.bats.sh
   qp_run cassd_zmq $INPUT  
   ezfio set determinants read_wf False
   energy="$(ezfio get cas_sd_zmq energy)"
-  eq $energy -76.2300888408526   2.E-5
+  eq $energy -76.2300887947446   2.E-5
 }
 
diff --git a/tests/bats/mrcepa0.bats b/tests/bats/mrcepa0.bats
index ed69681f..dc9e0bb4 100644
--- a/tests/bats/mrcepa0.bats
+++ b/tests/bats/mrcepa0.bats
@@ -16,7 +16,7 @@ source $QP_ROOT/tests/bats/common.bats.sh
   ezfio set mrcepa0 n_it_max_dressed_ci 3
   qp_run $EXE $INPUT  
   energy="$(ezfio get mrcepa0 energy_pt2)"
-  eq $energy -76.238562120457431 1.e-4
+  eq $energy -76.23752746236  1.e-4
 }
 
 @test "MRCC H2O cc-pVDZ" {
@@ -28,12 +28,11 @@ source $QP_ROOT/tests/bats/common.bats.sh
   ezfio set determinants threshold_generators 1.
   ezfio set determinants threshold_selectors  1.
   ezfio set determinants read_wf True
-  ezfio set determinants read_wf True
   ezfio set mrcepa0 lambda_type 0
   ezfio set mrcepa0 n_it_max_dressed_ci 3
   qp_run $EXE $INPUT  
   energy="$(ezfio get mrcepa0 energy_pt2)"
-  eq $energy -76.238527498388962 1.e-4
+  eq $energy -76.237469267705 2.e-4
 }
 
 @test "MRSC2 H2O cc-pVDZ" {
@@ -45,11 +44,11 @@ source $QP_ROOT/tests/bats/common.bats.sh
   ezfio set determinants threshold_generators 1.
   ezfio set determinants threshold_selectors  1.
   ezfio set determinants read_wf True
-  ezfio set mrcepa0 lambda_type 0
+  ezfio set mrcepa0 lambda_type 1
   ezfio set mrcepa0 n_it_max_dressed_ci 3
   qp_run $EXE $INPUT  
   energy="$(ezfio get mrcepa0 energy_pt2)"
-  eq $energy -76.235833732594187 1.e-4
+  eq $energy -76.2347764009137 2.e-4
 }
 
 @test "MRCEPA0 H2O cc-pVDZ" {
@@ -61,10 +60,10 @@ source $QP_ROOT/tests/bats/common.bats.sh
   ezfio set determinants threshold_generators 1.
   ezfio set determinants threshold_selectors  1.
   ezfio set determinants read_wf True
-  ezfio set mrcepa0 lambda_type 0
+  ezfio set mrcepa0 lambda_type 1
   ezfio set mrcepa0 n_it_max_dressed_ci 3
   qp_run $EXE $INPUT  
   energy="$(ezfio get mrcepa0 energy_pt2)"
-  eq $energy -76.2418799284763 1.e-4
+  eq $energy -76.2406942855164 2.e-4
 }
 
diff --git a/tests/bats/pseudo.bats b/tests/bats/pseudo.bats
index a20b0842..4b374d76 100644
--- a/tests/bats/pseudo.bats
+++ b/tests/bats/pseudo.bats
@@ -48,6 +48,6 @@ function run_FCI_ZMQ() {
 
 @test "FCI H2O VDZ pseudo" {
   qp_set_mo_class h2o_pseudo.ezfio -core "[1]" -act "[2-12]" -del "[13-23]"
-  run_FCI_ZMQ h2o_pseudo.ezfio 2000    -0.170399597228904E+02 -0.170400168816800E+02
+  run_FCI_ZMQ h2o_pseudo.ezfio 2000    -17.0399584106077 -17.0400170044515
 }
 
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
index c56f5602..3ac452ad 100755
--- a/tests/run_tests.sh
+++ b/tests/run_tests.sh
@@ -11,7 +11,7 @@ mrcepa0.bats
 #foboci.bats
 
 
-export QP_PREFIX="timeout -s 9 300"
+export QP_PREFIX="timeout -s 9 600"
 #export QP_TASK_DEBUG=1
 
 rm -rf work output