From 3194178dd24a12d92a2313c5f28affeeaa216b9a Mon Sep 17 00:00:00 2001
From: Anthony Scemama <scemama@irsamc.ups-tlse.fr>
Date: Tue, 18 Dec 2018 16:48:33 +0100
Subject: [PATCH] Removed uppercases in filenames

---
 scripts/generate_h_apply.py                   |    8 +-
 scripts/perturbation.py                       |    2 +-
 src/ao_basis/dimensions_integrals.irp.f       |    4 +-
 src/ao_one_e_integrals/pot_ao_ints.irp.f      |   14 +-
 .../pot_ao_pseudo_ints.irp.f                  |    4 +-
 src/bitmask/bitmasks.irp.f                    |    2 +-
 src/cis/{CIS.irp.f => cis.irp.f}              |    0
 src/cis/{H_apply.irp.f => h_apply.irp.f}      |    0
 src/cisd/{CISD.irp.f => cisd.irp.f}           |    0
 src/cisd/{H_apply.irp.f => h_apply.irp.f}     |    0
 ...gonalize_CI.irp.f => diagonalize_ci.irp.f} |    0
 src/davidson/{u0Hu0.irp.f => u0_h_u0.irp.f}   |    0
 ...gonalize_CI.irp.f => diagonalize_ci.irp.f} |    0
 .../{Fock_diag.irp.f => fock_diag.irp.f}      |    0
 .../{H_apply.irp.f => h_apply.irp.f}          |    0
 ...{H_apply.template.f => h_apply.template.f} |    0
 ...mq.template.f => h_apply_nozmq.template.f} |    0
 ..._zmq.template.f => h_apply_zmq.template.f} |    0
 src/determinants/s2.irp.f                     |    2 +-
 src/determinants/slater_rules.irp.f           |    4 +-
 src/determinants/spindeterminants.irp.f       |   10 +-
 src/fci/EZFIO.cfg                             |   13 +
 src/fci/NEED                                  |    7 +
 src/fci/README.rst                            |  112 ++
 src/fci/energy.irp.f                          |   33 +
 src/fci/fci.irp.f                             |  147 ++
 src/fci/pt2.irp.f                             |   40 +
 src/fci/pt2_stoch_routines.irp.f              |  669 ++++++++
 src/fci/run_pt2_slave.irp.f                   |  247 +++
 src/fci/run_selection_slave.irp.f             |  254 +++
 src/fci/selection.irp.f                       | 1371 +++++++++++++++++
 src/fci/selection_buffer.irp.f                |  303 ++++
 src/fci/selection_types.f90                   |    9 +
 src/fci/zmq_selection.irp.f                   |  219 +++
 src/four_idx/NEED                             |    1 +
 src/four_idx/README.rst                       |    6 +
 src/four_idx/four_idx_transform.irp.f         |   12 +
 src/hartree_fock/EZFIO.cfg                    |   53 +
 src/hartree_fock/NEED                         |    4 +
 src/hartree_fock/README.rst                   |   33 +
 src/hartree_fock/diagonalize_fock.irp.f       |  100 ++
 src/hartree_fock/diis.irp.f                   |  139 ++
 src/hartree_fock/fock_matrix.irp.f            |  320 ++++
 src/hartree_fock/hf_density_matrix_ao.irp.f   |   41 +
 src/hartree_fock/huckel.irp.f                 |   34 +
 src/hartree_fock/roothaan_hall_scf.irp.f      |  289 ++++
 src/hartree_fock/scf.irp.f                    |   60 +
 .../{H_apply.irp.f => h_apply.irp.f}          |    0
 src/perturbation/perturbation.irp.f           |    2 +-
 ...nearAlgebra.irp.f => linear_algebra.irp.f} |    0
 50 files changed, 4542 insertions(+), 26 deletions(-)
 rename src/cis/{CIS.irp.f => cis.irp.f} (100%)
 rename src/cis/{H_apply.irp.f => h_apply.irp.f} (100%)
 rename src/cisd/{CISD.irp.f => cisd.irp.f} (100%)
 rename src/cisd/{H_apply.irp.f => h_apply.irp.f} (100%)
 rename src/davidson/{diagonalize_CI.irp.f => diagonalize_ci.irp.f} (100%)
 rename src/davidson/{u0Hu0.irp.f => u0_h_u0.irp.f} (100%)
 rename src/davidson_dressed/{diagonalize_CI.irp.f => diagonalize_ci.irp.f} (100%)
 rename src/determinants/{Fock_diag.irp.f => fock_diag.irp.f} (100%)
 rename src/determinants/{H_apply.irp.f => h_apply.irp.f} (100%)
 rename src/determinants/{H_apply.template.f => h_apply.template.f} (100%)
 rename src/determinants/{H_apply_nozmq.template.f => h_apply_nozmq.template.f} (100%)
 rename src/determinants/{H_apply_zmq.template.f => h_apply_zmq.template.f} (100%)
 create mode 100644 src/fci/EZFIO.cfg
 create mode 100644 src/fci/NEED
 create mode 100644 src/fci/README.rst
 create mode 100644 src/fci/energy.irp.f
 create mode 100644 src/fci/fci.irp.f
 create mode 100644 src/fci/pt2.irp.f
 create mode 100644 src/fci/pt2_stoch_routines.irp.f
 create mode 100644 src/fci/run_pt2_slave.irp.f
 create mode 100644 src/fci/run_selection_slave.irp.f
 create mode 100644 src/fci/selection.irp.f
 create mode 100644 src/fci/selection_buffer.irp.f
 create mode 100644 src/fci/selection_types.f90
 create mode 100644 src/fci/zmq_selection.irp.f
 create mode 100644 src/four_idx/NEED
 create mode 100644 src/four_idx/README.rst
 create mode 100644 src/four_idx/four_idx_transform.irp.f
 create mode 100644 src/hartree_fock/EZFIO.cfg
 create mode 100644 src/hartree_fock/NEED
 create mode 100644 src/hartree_fock/README.rst
 create mode 100644 src/hartree_fock/diagonalize_fock.irp.f
 create mode 100644 src/hartree_fock/diis.irp.f
 create mode 100644 src/hartree_fock/fock_matrix.irp.f
 create mode 100644 src/hartree_fock/hf_density_matrix_ao.irp.f
 create mode 100644 src/hartree_fock/huckel.irp.f
 create mode 100644 src/hartree_fock/roothaan_hall_scf.irp.f
 create mode 100644 src/hartree_fock/scf.irp.f
 rename src/mrpt_utils/{H_apply.irp.f => h_apply.irp.f} (100%)
 rename src/utils/{LinearAlgebra.irp.f => linear_algebra.irp.f} (100%)

diff --git a/scripts/generate_h_apply.py b/scripts/generate_h_apply.py
index a100dd88..a5f3049d 100755
--- a/scripts/generate_h_apply.py
+++ b/scripts/generate_h_apply.py
@@ -61,10 +61,10 @@ subroutine
 class H_apply(object):
 
   def read_template(self):
-    file = open(os.environ["QP_ROOT"]+'/src/Determinants/H_apply.template.f','r')
+    file = open(os.environ["QP_ROOT"]+'/src/determinants/h_apply.template.f','r')
     self.template = file.read()
     file.close()
-    file = open(os.environ["QP_ROOT"]+'/src/Determinants/H_apply_nozmq.template.f','r')
+    file = open(os.environ["QP_ROOT"]+'/src/determinants/h_apply_nozmq.template.f','r')
     self.template += file.read()
     file.close()
 
@@ -439,10 +439,10 @@ class H_apply(object):
 class H_apply_zmq(H_apply):
 
   def read_template(self):
-    file = open(os.environ["QP_ROOT"]+'/src/Determinants/H_apply.template.f','r')
+    file = open(os.environ["QP_ROOT"]+'/src/determinants/h_apply.template.f','r')
     self.template = file.read()
     file.close()
-    file = open(os.environ["QP_ROOT"]+'/src/Determinants/H_apply_zmq.template.f','r')
+    file = open(os.environ["QP_ROOT"]+'/src/determinants/h_apply_zmq.template.f','r')
     self.template += file.read()
     file.close()
 
diff --git a/scripts/perturbation.py b/scripts/perturbation.py
index 1639bd00..225fcc1d 100755
--- a/scripts/perturbation.py
+++ b/scripts/perturbation.py
@@ -2,7 +2,7 @@
 
 import os
 
-Pert_dir = os.environ["QP_ROOT"]+"/src/Perturbation/"
+Pert_dir = os.environ["QP_ROOT"]+"/src/perturbation/"
 
 perturbations = []
 
diff --git a/src/ao_basis/dimensions_integrals.irp.f b/src/ao_basis/dimensions_integrals.irp.f
index 9bfd41b7..97fd83e1 100644
--- a/src/ao_basis/dimensions_integrals.irp.f
+++ b/src/ao_basis/dimensions_integrals.irp.f
@@ -6,13 +6,13 @@
  END_DOC
  integer :: n_pt_sup
  integer :: prim_power_l_max
- include 'Utils/constants.include.F'
+ include 'utils/constants.include.F'
  prim_power_l_max = maxval(ao_power)
  n_pt_max_integrals = 24 * prim_power_l_max + 4
  n_pt_max_i_x = 8 * prim_power_l_max
  ASSERT (n_pt_max_i_x-1 <= max_dim)
  if (n_pt_max_i_x-1 > max_dim) then
-   print *, 'Increase max_dim in Utils/constants.include.F to ', n_pt_max_i_x-1
+   print *, 'Increase max_dim in utils/constants.include.F to ', n_pt_max_i_x-1
    stop 1
  endif
 END_PROVIDER
diff --git a/src/ao_one_e_integrals/pot_ao_ints.irp.f b/src/ao_one_e_integrals/pot_ao_ints.irp.f
index 3c99aec0..2229d2ec 100644
--- a/src/ao_one_e_integrals/pot_ao_ints.irp.f
+++ b/src/ao_one_e_integrals/pot_ao_ints.irp.f
@@ -169,7 +169,7 @@ double precision function NAI_pol_mult(A_center,B_center,power_A,power_B,alpha,b
   double precision               :: V_e_n,const_factor,dist_integral,tmp
   double precision               :: accu,epsilo,rint
   integer                        :: n_pt_out,lmax
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   if ( (A_center(1)/=B_center(1)).or.                                &
         (A_center(2)/=B_center(2)).or.                               &
         (A_center(3)/=B_center(3)).or.                               &
@@ -371,7 +371,7 @@ recursive subroutine I_x1_pol_mult_mono_elec(a,c,R1x,R1xp,R2x,d,nd,n_pt_in)
   integer,intent(inout)          :: nd
   integer, intent(in)            :: a,c
   double precision, intent(in)   :: R1x(0:2),R1xp(0:2),R2x(0:2)
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   double precision               :: X(0:max_dim)
   double precision               :: Y(0:max_dim)
   !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: X, Y
@@ -436,7 +436,7 @@ recursive subroutine I_x2_pol_mult_mono_elec(c,R1x,R1xp,R2x,d,nd,dim)
 !  Recursive routine involved in the electron-nucleus potential
   END_DOC
   integer , intent(in)           :: dim
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   double precision               :: d(0:max_dim)
   integer,intent(inout)          :: nd
   integer, intent(in)            :: c
@@ -509,7 +509,7 @@ double precision function int_gaus_pol(alpha,n)
   double precision               :: alpha
   integer                        :: n
   double precision               :: dble_fact
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   
   int_gaus_pol = 0.d0
   if(iand(n,1).eq.0)then
@@ -535,7 +535,7 @@ double precision function V_r(n,alpha)
   END_DOC
   double precision               :: alpha, fact
   integer                        :: n
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   if(iand(n,1).eq.1)then
     V_r = 0.5d0 * fact(shiftr(n,1)) / (alpha ** (shiftr(n,1) + 1))
   else
@@ -570,7 +570,7 @@ double precision function V_theta(n,m)
   END_DOC
   integer                        :: n,m,i
   double precision               :: Wallis, prod
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   V_theta = 0.d0
   prod = 1.d0
   do i = 0,shiftr(n,1)-1
@@ -589,7 +589,7 @@ double precision function Wallis(n)
   END_DOC
   double precision               :: fact
   integer                        :: n,p
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   if(iand(n,1).eq.0)then
     Wallis = fact(shiftr(n,1))
     Wallis = pi * fact(n) / (dble(ibset(0_8,n)) * (Wallis+Wallis)*Wallis)
diff --git a/src/ao_one_e_integrals/pot_ao_pseudo_ints.irp.f b/src/ao_one_e_integrals/pot_ao_pseudo_ints.irp.f
index 807716d8..95703380 100644
--- a/src/ao_one_e_integrals/pot_ao_pseudo_ints.irp.f
+++ b/src/ao_one_e_integrals/pot_ao_pseudo_ints.irp.f
@@ -32,7 +32,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num,ao_num)]
   BEGIN_DOC
   ! Local pseudo-potential
   END_DOC
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   double precision               :: alpha, beta, gama, delta
   integer                        :: num_A,num_B
   double precision               :: A_center(3),B_center(3),C_center(3)
@@ -139,7 +139,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integral_local, (ao_num,ao_num)]
   BEGIN_DOC
   ! Non-local pseudo-potential
   END_DOC
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   double precision               :: alpha, beta, gama, delta
   integer                        :: num_A,num_B
   double precision               :: A_center(3),B_center(3),C_center(3)
diff --git a/src/bitmask/bitmasks.irp.f b/src/bitmask/bitmasks.irp.f
index d121ad25..918b38ed 100644
--- a/src/bitmask/bitmasks.irp.f
+++ b/src/bitmask/bitmasks.irp.f
@@ -2,7 +2,7 @@ use bitmasks
 
 BEGIN_PROVIDER [ integer, N_int ]
   implicit none
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   BEGIN_DOC
   ! Number of 64-bit integers needed to represent determinants as binary strings
   END_DOC
diff --git a/src/cis/CIS.irp.f b/src/cis/cis.irp.f
similarity index 100%
rename from src/cis/CIS.irp.f
rename to src/cis/cis.irp.f
diff --git a/src/cis/H_apply.irp.f b/src/cis/h_apply.irp.f
similarity index 100%
rename from src/cis/H_apply.irp.f
rename to src/cis/h_apply.irp.f
diff --git a/src/cisd/CISD.irp.f b/src/cisd/cisd.irp.f
similarity index 100%
rename from src/cisd/CISD.irp.f
rename to src/cisd/cisd.irp.f
diff --git a/src/cisd/H_apply.irp.f b/src/cisd/h_apply.irp.f
similarity index 100%
rename from src/cisd/H_apply.irp.f
rename to src/cisd/h_apply.irp.f
diff --git a/src/davidson/diagonalize_CI.irp.f b/src/davidson/diagonalize_ci.irp.f
similarity index 100%
rename from src/davidson/diagonalize_CI.irp.f
rename to src/davidson/diagonalize_ci.irp.f
diff --git a/src/davidson/u0Hu0.irp.f b/src/davidson/u0_h_u0.irp.f
similarity index 100%
rename from src/davidson/u0Hu0.irp.f
rename to src/davidson/u0_h_u0.irp.f
diff --git a/src/davidson_dressed/diagonalize_CI.irp.f b/src/davidson_dressed/diagonalize_ci.irp.f
similarity index 100%
rename from src/davidson_dressed/diagonalize_CI.irp.f
rename to src/davidson_dressed/diagonalize_ci.irp.f
diff --git a/src/determinants/Fock_diag.irp.f b/src/determinants/fock_diag.irp.f
similarity index 100%
rename from src/determinants/Fock_diag.irp.f
rename to src/determinants/fock_diag.irp.f
diff --git a/src/determinants/H_apply.irp.f b/src/determinants/h_apply.irp.f
similarity index 100%
rename from src/determinants/H_apply.irp.f
rename to src/determinants/h_apply.irp.f
diff --git a/src/determinants/H_apply.template.f b/src/determinants/h_apply.template.f
similarity index 100%
rename from src/determinants/H_apply.template.f
rename to src/determinants/h_apply.template.f
diff --git a/src/determinants/H_apply_nozmq.template.f b/src/determinants/h_apply_nozmq.template.f
similarity index 100%
rename from src/determinants/H_apply_nozmq.template.f
rename to src/determinants/h_apply_nozmq.template.f
diff --git a/src/determinants/H_apply_zmq.template.f b/src/determinants/h_apply_zmq.template.f
similarity index 100%
rename from src/determinants/H_apply_zmq.template.f
rename to src/determinants/h_apply_zmq.template.f
diff --git a/src/determinants/s2.irp.f b/src/determinants/s2.irp.f
index a473bd40..cda1404d 100644
--- a/src/determinants/s2.irp.f
+++ b/src/determinants/s2.irp.f
@@ -1,7 +1,7 @@
 double precision function diag_S_mat_elem(key_i,Nint)
   implicit none
   use bitmasks
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
 
   integer                        :: Nint
   integer(bit_kind), intent(in)  :: key_i(Nint,2)
diff --git a/src/determinants/slater_rules.irp.f b/src/determinants/slater_rules.irp.f
index 901ff734..aefbdbdb 100644
--- a/src/determinants/slater_rules.irp.f
+++ b/src/determinants/slater_rules.irp.f
@@ -1,6 +1,6 @@
 subroutine get_excitation_degree(key1,key2,degree,Nint)
   use bitmasks
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   implicit none
   BEGIN_DOC
   ! Returns the excitation degree between two determinants
@@ -1834,7 +1834,7 @@ end
 
 subroutine get_excitation_degree_spin(key1,key2,degree,Nint)
   use bitmasks
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   implicit none
   BEGIN_DOC
   ! Returns the excitation degree between two determinants
diff --git a/src/determinants/spindeterminants.irp.f b/src/determinants/spindeterminants.irp.f
index 06da6a31..675b6e60 100644
--- a/src/determinants/spindeterminants.irp.f
+++ b/src/determinants/spindeterminants.irp.f
@@ -961,7 +961,7 @@ subroutine get_all_spin_singles_and_doubles_1(buffer, idx, spindet, size_buffer,
   integer, intent(out)           :: n_doubles
 
   integer                        :: i
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   integer                        :: degree
 
 
@@ -1000,7 +1000,7 @@ subroutine get_all_spin_singles_1(buffer, idx, spindet, size_buffer, singles, n_
   integer, intent(out)           :: n_singles
   integer                        :: i
   integer                        :: degree
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
 
   n_singles = 1
   do i=1,size_buffer
@@ -1030,7 +1030,7 @@ subroutine get_all_spin_doubles_1(buffer, idx, spindet, size_buffer, doubles, n_
   integer, intent(out)           :: doubles(size_buffer)
   integer, intent(out)           :: n_doubles
   integer                        :: i
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   integer                        :: degree
 
   n_doubles = 1
@@ -1123,7 +1123,7 @@ subroutine get_all_spin_singles_$N_int(buffer, idx, spindet, size_buffer, single
   integer, intent(out)           :: n_singles
 
   integer                        :: i,k
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   integer(bit_kind)              :: xorvec($N_int)
   integer                        :: degree
 
@@ -1173,7 +1173,7 @@ subroutine get_all_spin_doubles_$N_int(buffer, idx, spindet, size_buffer, double
   integer, intent(out)           :: n_doubles
 
   integer                        :: i,k, degree
-  include 'Utils/constants.include.F'
+  include 'utils/constants.include.F'
   integer(bit_kind)              :: xorvec($N_int)
 
   n_doubles = 1
diff --git a/src/fci/EZFIO.cfg b/src/fci/EZFIO.cfg
new file mode 100644
index 00000000..d5526673
--- /dev/null
+++ b/src/fci/EZFIO.cfg
@@ -0,0 +1,13 @@
+[energy]
+type: double precision
+doc: Calculated Selected |FCI| energy
+interface: ezfio
+size: (determinants.n_states)
+
+[energy_pt2]
+type: double precision
+doc: Calculated |FCI| energy + |PT2|
+interface: ezfio
+size: (determinants.n_states)
+
+
diff --git a/src/fci/NEED b/src/fci/NEED
new file mode 100644
index 00000000..f7a3a46f
--- /dev/null
+++ b/src/fci/NEED
@@ -0,0 +1,7 @@
+perturbation
+selectors_full
+generators_full
+zmq
+mpi
+davidson_undressed
+iterations
diff --git a/src/fci/README.rst b/src/fci/README.rst
new file mode 100644
index 00000000..3bb6fb24
--- /dev/null
+++ b/src/fci/README.rst
@@ -0,0 +1,112 @@
+===
+FCI
+===
+
+Selected Full Configuration Interaction.
+
+The :command:`FCI` program starts with a single determinant, or with the wave
+function in the |EZFIO| database if :option:`determinants read_wf` is |true|.
+Then, it will iteratively:
+
+* Select the most important determinants from the external space and add them to the
+  internal space
+* If :option:`determinants s2_eig` is |true|, add all the necessary
+  determinants to allow the eigenstates of |H| to be eigenstates of |S^2|
+* Diagonalize |H| in the enlarged internal space
+* Compute (stochastically) the second-order perturbative contribution to the energy 
+* Extrapolate the variational energy by fitting
+  :math:`E=E_\text{FCI} - \alpha\, E_\text{PT2}`
+
+
+The number of selected determinants at each iteration will be such that the
+size of the wave function will double at every iteration. If :option:`determinants
+s2_eig` is |true|, then the number of selected determinants will be 1.5x the
+current number, and then all the additional determinants will be added.
+
+By default, the program will stop when more than one million determinants have
+been selected, or when the |PT2| energy is below :math:`10^{-4}`.
+
+The variational and |PT2| energies of the iterations are stored in the
+|EZFIO| database, in the :ref:`IterativeSave` module.
+
+
+
+Computation of the |PT2| energy
+-------------------------------
+
+At each iteration, the |PT2| energy is computed considering the Epstein-Nesbet
+zeroth-order Hamiltonian:
+
+.. math::
+
+  E_{\text{PT2}} = \sum_{ \alpha }
+    \frac{|\langle \Psi_S | \hat{H} | \alpha \rangle|^2}
+         {E - \langle \alpha | \hat{H} | \alpha \rangle}
+
+where the |kalpha| determinants are generated by applying all the single and
+double excitation operators to all the determinants of the wave function
+:math:`\Psi_G`.
+
+When the hybrid-deterministic/stochastic algorithm is chosen
+(default), :math:`Psi_G = \Psi_S = \Psi`, the full wavefunction expanded in the
+internal space.
+When the deterministic algorithm is chosen (:option:`perturbation do_pt2`
+is set to |false|), :math:`Psi_G` is a truncation of |Psi| using
+:option:`determinants threshold_generators`, and :math:`Psi_S` is a truncation
+of |Psi| using :option:`determinants threshold_selectors`, and re-weighted
+by :math:`1/\langle \Psi_s | \Psi_s \rangle`. 
+
+At every iteration, while computing the |PT2|, the variance of the wave
+function is also computed:
+
+.. math:: 
+
+  \sigma^2 & = \langle \Psi | \hat{H}^2 | \Psi \rangle -
+               \langle  \Psi | \hat{H}   | \Psi \rangle^2 \\
+           & = \sum_{i \in \text{FCI}}
+               \langle \Psi | \hat{H} | i \rangle 
+               \langle i | \hat{H} | \Psi \rangle -
+               \langle  \Psi | \hat{H} | \Psi \rangle^2 \\
+           & = \sum_{ \alpha }
+               \langle |\Psi | \hat{H} | \alpha \rangle|^2.
+
+The expression of the variance is the same as the expression of the |PT2|, with
+a denominator of 1. It measures how far the wave function is from the |FCI|
+solution. Note that the absence of denominator in the Heat-Bath selected |CI|
+method is selection method by minimization of the variance, whereas |CIPSI| is
+a selection method by minimization of the energy.
+
+
+If :option:`perturbation do_pt2` is set to |false|, then the stochastic
+|PT2| is not computed, and an approximate value is obtained from the |CIPSI|
+selection. The calculation is faster, but the extrapolated |FCI| value is 
+less accurate. This way of running the code should be used when the only
+goal is to generate a wave function, as for using |CIPSI| wave functions as
+trial wave functions of |QMC| calculations for example.
+
+
+The :command:`PT2` program reads the wave function of the |EZFIO| database
+and computes the energy and the |PT2| contribution.
+
+
+State-averaging
+---------------
+
+Extrapolated |FCI| energy
+-------------------------
+
+An estimate of the |FCI| energy is computed by extrapolating
+
+.. math::
+
+  E=E_\text{FCI} - \alpha\, E_\text{PT2}
+
+This extrapolation is done for all the requested states, and excitation
+energies are printed as energy differences between the extrapolated
+energies of the excited states and the extrapolated energy of the ground
+state.
+
+The extrapolations are given considering the 2 last points, the 3 last points, ...,
+the 7 last points. The extrapolated value should be chosen such that the extrpolated
+value is stable with the number of points.
+
diff --git a/src/fci/energy.irp.f b/src/fci/energy.irp.f
new file mode 100644
index 00000000..b7ba42bb
--- /dev/null
+++ b/src/fci/energy.irp.f
@@ -0,0 +1,33 @@
+BEGIN_PROVIDER [ logical, initialize_pt2_E0_denominator ]
+ implicit none
+ BEGIN_DOC
+ ! If true, initialize pt2_E0_denominator
+ END_DOC
+ initialize_pt2_E0_denominator = .True.
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, pt2_E0_denominator, (N_states) ]
+ implicit none
+ BEGIN_DOC
+ ! E0 in the denominator of the PT2
+ END_DOC
+ if (initialize_pt2_E0_denominator) then
+   if (h0_type == "EN") then
+     pt2_E0_denominator(1:N_states) = psi_energy(1:N_states)
+   else if (h0_type == "Barycentric") then
+     pt2_E0_denominator(1:N_states) = barycentric_electronic_energy(1:N_states)
+   else if (h0_type == "Variance") then
+     pt2_E0_denominator(1:N_states) = psi_energy(1:N_states) !1.d0-nuclear_repulsion
+   else if (h0_type == "SOP") then
+     pt2_E0_denominator(1:N_states) = psi_energy(1:N_states)
+   else
+     print *,  h0_type, ' not implemented'
+     stop
+   endif
+  call write_double(6,pt2_E0_denominator(1)+nuclear_repulsion, 'PT2 Energy denominator')
+ else
+   pt2_E0_denominator = -huge(1.d0)
+ endif
+END_PROVIDER
+
+
diff --git a/src/fci/fci.irp.f b/src/fci/fci.irp.f
new file mode 100644
index 00000000..01cca708
--- /dev/null
+++ b/src/fci/fci.irp.f
@@ -0,0 +1,147 @@
+program fci_zmq
+  implicit none
+  integer                        :: i,j,k
+  double precision, allocatable  :: pt2(:), variance(:), norm(:), rpt2(:)
+  integer                        :: n_det_before, to_select
+  
+  double precision :: rss
+  double precision, external :: memory_of_double
+  rss = memory_of_double(N_states)*4.d0
+  call check_mem(rss,irp_here)
+
+  allocate (pt2(N_states), rpt2(N_states), norm(N_states), variance(N_states))
+
+  double precision               :: hf_energy_ref
+  logical                        :: has
+  double precision               :: relative_error
+
+  PROVIDE H_apply_buffer_allocated
+
+  relative_error=PT2_relative_error
+
+  pt2 = -huge(1.e0)
+  rpt2 = -huge(1.e0)
+  norm = 0.d0
+  variance = 0.d0
+
+  if (s2_eig) then
+    call make_s2_eigenfunction
+  endif
+  call diagonalize_CI
+  call save_wavefunction
+  
+  call ezfio_has_hartree_fock_energy(has)
+  if (has) then
+    call ezfio_get_hartree_fock_energy(hf_energy_ref)
+  else
+    hf_energy_ref = ref_bitmask_energy
+  endif
+
+  if (N_det > N_det_max) then
+    psi_det = psi_det_sorted
+    psi_coef = psi_coef_sorted
+    N_det = N_det_max
+    soft_touch N_det psi_det psi_coef
+    call diagonalize_CI
+    call save_wavefunction
+  endif
+  
+  n_det_before = 0
+
+  double precision :: correlation_energy_ratio
+  double precision :: threshold_selectors_save, threshold_generators_save
+  threshold_selectors_save  = threshold_selectors
+  threshold_generators_save = threshold_generators
+  double precision :: error(N_states)
+
+  correlation_energy_ratio = 0.d0
+
+  do while (                                                         &
+        (N_det < N_det_max) .and.                                    &
+        (maxval(abs(pt2(1:N_states))) > pt2_max) .and.               &
+        (correlation_energy_ratio <= correlation_energy_ratio_max)    &
+        )
+      write(*,'(A)')  '--------------------------------------------------------------------------------'
+
+
+    if (do_pt2) then
+      pt2 = 0.d0
+      variance = 0.d0
+      norm = 0.d0
+      threshold_selectors = 1.d0
+      threshold_generators = 1.d0 
+      SOFT_TOUCH threshold_selectors threshold_generators
+      call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, norm) ! Stochastic PT2
+      threshold_selectors = threshold_selectors_save
+      threshold_generators = threshold_generators_save
+      SOFT_TOUCH threshold_selectors threshold_generators
+    endif
+
+
+    correlation_energy_ratio = (psi_energy_with_nucl_rep(1) - hf_energy_ref)  /     &
+                    (psi_energy_with_nucl_rep(1) + pt2(1) - hf_energy_ref)
+    correlation_energy_ratio = min(1.d0,correlation_energy_ratio)
+
+    call ezfio_set_fci_energy_pt2(psi_energy_with_nucl_rep+pt2)
+    call write_double(6,correlation_energy_ratio, 'Correlation ratio')
+    call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm)
+
+    do k=1,N_states
+      rpt2(:) = pt2(:)/(1.d0 + norm(k)) 
+    enddo
+
+    call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) 
+    call print_extrapolated_energy(psi_energy_with_nucl_rep(1:N_states),rpt2)
+    N_iter += 1
+
+    n_det_before = N_det
+    to_select = N_det
+    to_select = max(N_states_diag, to_select)
+    to_select = min(to_select, N_det_max-n_det_before)
+    call ZMQ_selection(to_select, pt2, variance, norm)
+    
+    PROVIDE  psi_coef
+    PROVIDE  psi_det
+    PROVIDE  psi_det_sorted
+
+    call diagonalize_CI
+    call save_wavefunction
+    call ezfio_set_fci_energy(psi_energy_with_nucl_rep(1:N_states))
+  enddo
+
+  if (N_det < N_det_max) then
+      call diagonalize_CI
+      call save_wavefunction
+      call ezfio_set_fci_energy(psi_energy_with_nucl_rep(1:N_states))
+      call ezfio_set_fci_energy_pt2(psi_energy_with_nucl_rep(1:N_states)+pt2)
+  endif
+
+  if (do_pt2) then
+    pt2 = 0.d0
+    variance = 0.d0
+    norm = 0.d0
+    threshold_selectors = 1.d0
+    threshold_generators = 1d0 
+    SOFT_TOUCH threshold_selectors threshold_generators
+    call ZMQ_pt2(psi_energy_with_nucl_rep, pt2,relative_error,error,variance,norm) ! Stochastic PT2
+    threshold_selectors = threshold_selectors_save
+    threshold_generators = threshold_generators_save
+    SOFT_TOUCH threshold_selectors threshold_generators
+    call ezfio_set_fci_energy(psi_energy_with_nucl_rep(1:N_states))
+    call ezfio_set_fci_energy_pt2(psi_energy_with_nucl_rep(1:N_states)+pt2)
+  endif
+  print *,  'N_det             = ', N_det
+  print *,  'N_sop             = ', N_occ_pattern
+  print *,  'N_states          = ', N_states
+  print*,   'correlation_ratio = ', correlation_energy_ratio
+
+
+  do k=1,N_states
+    rpt2(:) = pt2(:)/(1.d0 + norm(k)) 
+  enddo
+
+  call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm)
+  call save_iterations(psi_energy_with_nucl_rep(1:N_states),rpt2,N_det) 
+  call print_extrapolated_energy(psi_energy_with_nucl_rep(1:N_states),rpt2)
+
+end
diff --git a/src/fci/pt2.irp.f b/src/fci/pt2.irp.f
new file mode 100644
index 00000000..c0c2e01d
--- /dev/null
+++ b/src/fci/pt2.irp.f
@@ -0,0 +1,40 @@
+program pt2_stoch
+  implicit none
+  read_wf = .True.
+  SOFT_TOUCH read_wf
+  PROVIDE mo_bielec_integrals_in_map
+  PROVIDE psi_energy
+  call run
+end
+
+subroutine run
+  implicit none
+  integer                        :: i,j,k
+  logical, external              :: detEq
+  
+  double precision               :: pt2(N_states)
+  integer                        :: degree
+  integer                        :: n_det_before, to_select
+  double precision               :: threshold_davidson_in
+  
+  double precision               :: E_CI_before(N_states), relative_error, error(N_states), variance(N_states), norm(N_states), rpt2(N_states)
+  
+  pt2(:) = 0.d0
+  
+  E_CI_before(:) = psi_energy(:) + nuclear_repulsion
+  threshold_selectors = 1.d0
+  threshold_generators = 1.d0
+  relative_error=PT2_relative_error
+  
+  call ZMQ_pt2(psi_energy_with_nucl_rep,pt2,relative_error,error, variance, norm) ! Stochastic PT2
+  do k=1,N_states
+    rpt2(:) = pt2(:)/(1.d0 + norm(k)) 
+  enddo
+
+  call print_summary(psi_energy_with_nucl_rep(1:N_states),pt2,error,variance,norm)
+
+  call ezfio_set_fci_energy(E_CI_before)
+  call ezfio_set_fci_energy_pt2(E_CI_before+pt2)
+end
+
+
diff --git a/src/fci/pt2_stoch_routines.irp.f b/src/fci/pt2_stoch_routines.irp.f
new file mode 100644
index 00000000..a777f353
--- /dev/null
+++ b/src/fci/pt2_stoch_routines.irp.f
@@ -0,0 +1,669 @@
+BEGIN_PROVIDER [ integer, pt2_stoch_istate ]
+ implicit none
+ BEGIN_DOC
+ ! State for stochatsic PT2 
+ END_DOC
+ pt2_stoch_istate = 1
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, pt2_F, (N_det_generators) ]
+&BEGIN_PROVIDER [ integer, pt2_n_tasks_max ]
+  implicit none
+  logical, external :: testTeethBuilding
+  integer :: i
+  integer :: e
+  e = elec_num - n_core_orb * 2
+  pt2_n_tasks_max = 1+min((e*(e-1))/2, int(dsqrt(dble(N_det_selectors)))/10)
+  do i=1,N_det_generators
+    pt2_F(i) = 1 + int(dble(pt2_n_tasks_max)*maxval(dsqrt(dabs(psi_coef_sorted_gen(i,1:N_states)))))
+  enddo
+END_PROVIDER
+
+ BEGIN_PROVIDER [ integer, pt2_N_teeth ]
+&BEGIN_PROVIDER [ integer, pt2_minDetInFirstTeeth ]
+  implicit none
+  logical, external :: testTeethBuilding
+  
+  if(N_det_generators < 1024) then
+    pt2_minDetInFirstTeeth = 1
+    pt2_N_teeth = 1
+  else
+    pt2_minDetInFirstTeeth = min(5, N_det_generators)
+    do pt2_N_teeth=100,2,-1
+      if(testTeethBuilding(pt2_minDetInFirstTeeth, pt2_N_teeth)) exit
+    end do
+  end if
+  call write_int(6,pt2_N_teeth,'Number of comb teeth')
+END_PROVIDER
+
+
+logical function testTeethBuilding(minF, N)
+  implicit none
+  integer, intent(in) :: minF, N
+  integer :: n0, i
+  double precision :: u0, Wt, r
+  
+  double precision, allocatable :: tilde_w(:), tilde_cW(:)
+  integer, external :: dress_find_sample
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+
+  rss = memory_of_double(2*N_det_generators+1)
+  call check_mem(rss,irp_here)
+
+  allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
+  
+  do i=1,N_det_generators
+    tilde_w(i)  = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 !+ 1.d-20
+  enddo
+
+  double precision :: norm
+  norm = 0.d0
+  do i=N_det_generators,1,-1
+    norm += tilde_w(i) 
+  enddo
+
+  tilde_w(:) = tilde_w(:) / norm
+
+  tilde_cW(0) = -1.d0
+  do i=1,N_det_generators
+    tilde_cW(i) = tilde_cW(i-1) + tilde_w(i)
+  enddo
+  tilde_cW(:) = tilde_cW(:) + 1.d0
+
+  n0 = 0
+  testTeethBuilding = .false.
+  do
+    u0 = tilde_cW(n0)
+    r = tilde_cW(n0 + minF)
+    Wt = (1d0 - u0) / dble(N)
+    if (dabs(Wt) <= 1.d-3) then
+      return
+    endif
+    if(Wt >= r - u0) then
+       testTeethBuilding = .true.
+       return
+    end if
+    n0 += 1
+    if(N_det_generators - n0 < minF * N) then
+      return
+    end if
+  end do
+  stop "exited testTeethBuilding"
+end function
+
+
+
+subroutine ZMQ_pt2(E, pt2,relative_error, error, variance, norm)
+  use f77_zmq
+  use selection_types
+  
+  implicit none
+  
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket, zmq_socket_pull
+  integer, external              :: omp_get_thread_num
+  double precision, intent(in)   :: relative_error, E(N_states)
+  double precision, intent(out)  :: pt2(N_states),error(N_states)
+  double precision, intent(out)  :: variance(N_states),norm(N_states)
+  
+  
+  integer                        :: i
+  
+  double precision, external     :: omp_get_wtime
+  double precision               :: state_average_weight_save(N_states), w(N_states,4)
+  integer(ZMQ_PTR), external     :: new_zmq_to_qp_run_socket
+  
+  if (N_det < max(10,N_states)) then
+    pt2=0.d0
+    variance=0.d0
+    norm=0.d0
+    call ZMQ_selection(0, pt2, variance, norm)
+    error(:) = 0.d0
+  else
+    
+    state_average_weight_save(:) = state_average_weight(:)
+    do pt2_stoch_istate=1,N_states
+      state_average_weight(:) = 0.d0
+      state_average_weight(pt2_stoch_istate) = 1.d0
+      TOUCH state_average_weight pt2_stoch_istate
+
+      provide nproc pt2_F mo_bielec_integrals_in_map mo_mono_elec_integral pt2_w psi_selectors 
+      call new_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
+
+      integer, external              :: zmq_put_psi
+      integer, external              :: zmq_put_N_det_generators
+      integer, external              :: zmq_put_N_det_selectors
+      integer, external              :: zmq_put_dvector
+      integer, external              :: zmq_put_ivector
+      if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
+        stop 'Unable to put psi on ZMQ server'
+      endif
+      if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then
+        stop 'Unable to put N_det_generators on ZMQ server'
+      endif
+      if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then
+        stop 'Unable to put N_det_selectors on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then
+        stop 'Unable to put energy on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then
+        stop 'Unable to put state_average_weight on ZMQ server'
+      endif
+      if (zmq_put_ivector(zmq_to_qp_run_socket,1,'pt2_stoch_istate',pt2_stoch_istate,1) == -1) then
+        stop 'Unable to put pt2_stoch_istate on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) then
+        stop 'Unable to put threshold_selectors on ZMQ server'
+      endif
+      if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) then
+        stop 'Unable to put threshold_generators on ZMQ server'
+      endif
+
+
+
+      integer, external :: add_task_to_taskserver
+      character(400000) :: task
+
+      integer :: j,k,ipos,ifirst
+      ifirst=0
+
+      ipos=0
+      do i=1,N_det_generators
+        if (pt2_F(i) > 1) then
+          ipos += 1
+        endif
+      enddo
+      call write_int(6,sum(pt2_F),'Number of tasks')
+      call write_int(6,ipos,'Number of fragmented tasks')
+
+      ipos=1
+      do i= 1, N_det_generators
+        do j=1,pt2_F(pt2_J(i))
+          write(task(ipos:ipos+20),'(I9,1X,I9,''|'')') j, pt2_J(i)
+          ipos += 20
+          if (ipos > 400000-20) then
+            if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+              stop 'Unable to add task to task server'
+            endif
+            ipos=1
+            if (ifirst == 0) then
+              ifirst=1
+              if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+                print *,  irp_here, ': Failed in zmq_set_running'
+              endif
+            endif
+          endif
+        end do
+      enddo
+      if (ipos > 1) then
+        if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+          stop 'Unable to add task to task server'
+        endif
+      endif
+      
+      integer, external :: zmq_set_running
+      if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+        print *,  irp_here, ': Failed in zmq_set_running'
+      endif
+
+      
+      integer :: nproc_target
+      nproc_target = nproc
+      double precision :: mem
+      mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 +  3.d0 + 5.d0) / (1024.d0**3)
+      call write_double(6,mem,'Estimated memory/thread (Gb)')
+      if (qp_max_mem > 0) then
+        nproc_target = max(1,int(dble(qp_max_mem)/mem))
+        nproc_target = min(nproc_target,nproc)
+      endif
+
+      call omp_set_nested(.false.)
+
+      
+      print '(A)', '========== ================= =========== =============== =============== ================='
+      print '(A)', ' Samples        Energy        Stat. Err     Variance          Norm          Seconds      '
+      print '(A)', '========== ================= =========== =============== =============== ================='
+      
+      !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(nproc_target+1)            &
+          !$OMP  PRIVATE(i)
+      i = omp_get_thread_num()
+      if (i==0) then
+
+        call pt2_collector(zmq_socket_pull, E(pt2_stoch_istate),relative_error, w(1,1), w(1,2), w(1,3), w(1,4))
+        pt2(pt2_stoch_istate) = w(pt2_stoch_istate,1)
+        error(pt2_stoch_istate) = w(pt2_stoch_istate,2)
+        variance(pt2_stoch_istate) = w(pt2_stoch_istate,3)
+        norm(pt2_stoch_istate) = w(pt2_stoch_istate,4)
+
+      else
+        call pt2_slave_inproc(i)
+      endif
+      !$OMP END PARALLEL
+      call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
+      
+      print '(A)', '========== ================= =========== =============== =============== ================='
+      
+    enddo
+!    call omp_set_nested(.false.)
+
+    FREE pt2_stoch_istate
+    state_average_weight(:) = state_average_weight_save(:)
+    TOUCH state_average_weight
+  endif
+  do k=N_det+1,N_states
+    pt2(k) = 0.d0
+  enddo
+
+end subroutine
+
+
+subroutine pt2_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+
+  call run_pt2_slave(1,i,pt2_e0_denominator)
+end
+
+
+subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2, error, variance, norm)
+  use f77_zmq
+  use selection_types
+  use bitmasks
+  implicit none
+
+  
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  double precision, intent(in) :: relative_error, E
+  double precision, intent(out)  :: pt2(N_states), error(N_states)
+  double precision, intent(out)  :: variance(N_states), norm(N_states)
+
+
+  double precision, allocatable      :: eI(:,:), eI_task(:,:), S(:), S2(:)
+  double precision, allocatable      :: vI(:,:), vI_task(:,:), T2(:)
+  double precision, allocatable      :: nI(:,:), nI_task(:,:), T3(:)
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+  integer, external :: zmq_delete_tasks
+  integer, external :: zmq_abort
+  integer, external :: pt2_find_sample_lr
+
+  integer :: more, n, i, p, c, t, n_tasks, U
+  integer, allocatable :: task_id(:)
+  integer, allocatable :: index(:)
+  
+  double precision, external :: omp_get_wtime
+  double precision :: v, x, x2, x3, avg, avg2, avg3, eqt, E0, v0, n0
+  double precision :: time, time0
+  
+  integer, allocatable :: f(:)
+  logical, allocatable :: d(:) 
+  logical :: do_exit
+
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+
+  rss  = memory_of_int(pt2_n_tasks_max*2+N_det_generators*2)
+  rss += memory_of_double(N_states*N_det_generators)*3.d0 
+  rss += memory_of_double(N_states*pt2_n_tasks_max)*3.d0 
+  rss += memory_of_double(pt2_N_teeth+1)*4.d0 
+  call check_mem(rss,irp_here)
+
+  allocate(task_id(pt2_n_tasks_max), index(pt2_n_tasks_max), f(N_det_generators))
+  allocate(d(N_det_generators+1))
+  allocate(eI(N_states, N_det_generators), eI_task(N_states, pt2_n_tasks_max))
+  allocate(vI(N_states, N_det_generators), vI_task(N_states, pt2_n_tasks_max))
+  allocate(nI(N_states, N_det_generators), nI_task(N_states, pt2_n_tasks_max))
+  allocate(S(pt2_N_teeth+1), S2(pt2_N_teeth+1))
+  allocate(T2(pt2_N_teeth+1), T3(pt2_N_teeth+1))
+
+
+  
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+  
+  pt2(:) = -huge(1.)
+  error(:) = huge(1.)
+  variance(:) = huge(1.)
+  norm(:) = 0.d0
+  S(:) = 0d0
+  S2(:) = 0d0
+  T2(:) = 0d0
+  T3(:) = 0d0
+  n = 1
+  t = 0
+  U = 0
+  eI(:,:) = 0d0
+  vI(:,:) = 0d0
+  nI(:,:) = 0d0
+  f(:) = pt2_F(:)
+  d(:) = .false.
+  n_tasks = 0
+  E0 = E
+  v0 = 0.d0
+  n0 = 0.d0
+  more = 1
+  time0 = omp_get_wtime()
+
+  do_exit = .false.
+  do while (n <= N_det_generators)
+    if(f(pt2_J(n)) == 0) then
+      d(pt2_J(n)) = .true.
+      do while(d(U+1))
+        U += 1
+      end do
+
+      ! Deterministic part
+      do while(t <= pt2_N_teeth)
+        if(U >= pt2_n_0(t+1)) then
+          t=t+1
+          E0 = 0.d0
+          v0 = 0.d0
+          n0 = 0.d0
+          do i=pt2_n_0(t),1,-1
+            E0 += eI(pt2_stoch_istate, i)
+            v0 += vI(pt2_stoch_istate, i)
+            n0 += nI(pt2_stoch_istate, i)
+          end do
+        else
+          exit
+        end if
+      end do
+
+      ! Add Stochastic part
+      c = pt2_R(n)
+      if(c > 0) then
+        x  = 0d0
+        x2 = 0d0
+        x3 = 0d0
+        do p=pt2_N_teeth, 1, -1
+          v = pt2_u_0 + pt2_W_T * (pt2_u(c) + dble(p-1))
+          i = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(p),pt2_n_0(p+1))
+          x  += eI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i)
+          x2 += vI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i)
+          x3 += nI(pt2_stoch_istate, i) * pt2_W_T / pt2_w(i)
+          S(p) += x
+          S2(p) += x*x
+          T2(p) += x2
+          T3(p) += x3
+        end do
+        avg  = E0 + S(t) / dble(c)
+        avg2 = v0 + T2(t) / dble(c)
+        avg3 = n0 + T3(t) / dble(c)
+        if ((avg /= 0.d0) .or. (n == N_det_generators) ) then 
+          do_exit = .true.
+        endif
+        pt2(pt2_stoch_istate) = avg
+        variance(pt2_stoch_istate) = avg2 
+        norm(pt2_stoch_istate) = avg3
+        ! 1/(N-1.5) : see  Brugger, The American Statistician (23) 4 p. 32 (1969)
+        if(c > 2) then
+          eqt = dabs((S2(t) / c) - (S(t)/c)**2) ! dabs for numerical stability
+          eqt = sqrt(eqt / (dble(c) - 1.5d0))  
+          error(pt2_stoch_istate) = eqt
+          if(mod(c,10)==0 .or. n==N_det_generators) then
+            print '(G10.3, 2X, F16.10, 2X, G10.3, 2X, F14.10, 2X, F14.10, 2X, F10.4, A10)', c, avg+E, eqt, avg2, avg3, time-time0, ''
+            if(do_exit .and. (dabs(error(pt2_stoch_istate)) / (1.d-20 + dabs(pt2(pt2_stoch_istate)) ) <= relative_error)) then 
+              if (zmq_abort(zmq_to_qp_run_socket) == -1) then
+                call sleep(10)
+                if (zmq_abort(zmq_to_qp_run_socket) == -1) then
+                  print *, irp_here, ': Error in sending abort signal (2)'
+                endif
+              endif
+            endif
+          endif
+        endif
+        time = omp_get_wtime()
+      end if
+      n += 1
+    else if(more == 0) then
+      exit
+    else
+      call pull_pt2_results(zmq_socket_pull, index, eI_task, vI_task, nI_task, task_id, n_tasks)
+      if (zmq_delete_tasks(zmq_to_qp_run_socket,zmq_socket_pull,task_id,n_tasks,more) == -1) then
+          stop 'Unable to delete tasks'
+      endif
+      do i=1,n_tasks
+        eI(:, index(i)) += eI_task(:,i)
+        vI(:, index(i)) += vI_task(:,i)
+        nI(:, index(i)) += nI_task(:,i)
+        f(index(i)) -= 1
+      end do
+    end if
+  end do
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+end subroutine
+
+
+integer function pt2_find_sample(v, w)
+  implicit none
+  double precision, intent(in) :: v, w(0:N_det_generators)
+  integer, external :: pt2_find_sample_lr
+
+  pt2_find_sample = pt2_find_sample_lr(v, w, 0, N_det_generators)
+end function
+
+
+integer function pt2_find_sample_lr(v, w, l_in, r_in)
+  implicit none
+  double precision, intent(in) :: v, w(0:N_det_generators)
+  integer, intent(in) :: l_in,r_in
+  integer :: i,l,r
+
+  l=l_in
+  r=r_in
+
+  do while(r-l > 1)
+    i = shiftr(r+l,1)
+    if(w(i) < v) then
+      l = i
+    else
+      r = i
+    end if
+  end do
+  i = r
+  do r=i+1,N_det_generators
+    if (w(r) /= w(i)) then 
+      exit
+    endif
+  enddo
+  pt2_find_sample_lr = r-1
+end function
+
+
+BEGIN_PROVIDER [ integer, pt2_n_tasks ]
+ implicit none
+ BEGIN_DOC
+ ! Number of parallel tasks for the Monte Carlo
+ END_DOC
+ pt2_n_tasks = N_det_generators
+END_PROVIDER
+
+BEGIN_PROVIDER[ double precision, pt2_u, (N_det_generators)]
+  implicit none
+  integer, allocatable :: seed(:)
+  integer :: m,i
+  call random_seed(size=m)
+  allocate(seed(m))
+  do i=1,m
+    seed(i) = i
+  enddo
+  call random_seed(put=seed)
+  deallocate(seed)
+
+  call RANDOM_NUMBER(pt2_u)
+ END_PROVIDER
+
+ BEGIN_PROVIDER[ integer, pt2_J, (N_det_generators)]
+&BEGIN_PROVIDER[ integer, pt2_R, (N_det_generators)]
+  implicit none
+  integer                :: N_c, N_j
+  integer                :: U, t, i
+  double precision       :: v
+  integer, external      :: pt2_find_sample_lr
+  
+  logical, allocatable :: pt2_d(:)
+  integer :: m,l,r,k
+  integer :: ncache
+  integer, allocatable :: ii(:,:)
+  double precision :: dt
+
+  ncache = min(N_det_generators,10000)
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_int(ncache)*dble(pt2_N_teeth) + memory_of_int(N_det_generators)
+  call check_mem(rss,irp_here)
+
+  allocate(ii(pt2_N_teeth,ncache),pt2_d(N_det_generators))
+
+  pt2_R(:) = 0
+  pt2_d(:) = .false.
+  N_c = 0
+  N_j = pt2_n_0(1)
+  do i=1,N_j
+      pt2_d(i) = .true.
+      pt2_J(i) = i
+  end do
+
+  U = 0
+  do while(N_j < pt2_n_tasks)
+
+    if (N_c+ncache > N_det_generators) then
+      ncache = N_det_generators - N_c
+    endif
+
+    !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(dt,v,t,k)
+    do k=1, ncache
+      dt = pt2_u_0 
+      do t=1, pt2_N_teeth
+        v = dt + pt2_W_T *pt2_u(N_c+k)
+        dt = dt + pt2_W_T 
+        ii(t,k) = pt2_find_sample_lr(v, pt2_cW,pt2_n_0(t),pt2_n_0(t+1))
+      end do
+    enddo
+    !$OMP END PARALLEL DO
+
+    do k=1,ncache
+      !ADD_COMB
+      N_c = N_c+1
+      do t=1, pt2_N_teeth
+        i = ii(t,k)
+        if(.not. pt2_d(i)) then
+          N_j += 1
+          pt2_J(N_j) = i
+          pt2_d(i) = .true.
+        end if
+      end do
+      
+      pt2_R(N_j) = N_c
+      
+      !FILL_TOOTH
+      do while(U < N_det_generators)
+        U += 1
+        if(.not. pt2_d(U)) then
+          N_j += 1
+          pt2_J(N_j) = U
+          pt2_d(U) = .true.
+          exit
+        end if
+      end do
+      if (N_j >= pt2_n_tasks) exit
+    end do
+  enddo
+
+  if(N_det_generators > 1) then
+    pt2_R(N_det_generators-1) = 0
+    pt2_R(N_det_generators) = N_c
+  end if
+
+  deallocate(ii,pt2_d)
+
+END_PROVIDER
+
+
+
+ BEGIN_PROVIDER [ double precision,     pt2_w, (N_det_generators) ] 
+&BEGIN_PROVIDER [ double precision,     pt2_cW, (0:N_det_generators) ] 
+&BEGIN_PROVIDER [ double precision,     pt2_W_T ]
+&BEGIN_PROVIDER [ double precision,     pt2_u_0 ]
+&BEGIN_PROVIDER [ integer,              pt2_n_0, (pt2_N_teeth+1) ]
+  implicit none
+  integer :: i, t
+  double precision, allocatable :: tilde_w(:), tilde_cW(:)
+  double precision :: r, tooth_width
+  integer, external :: pt2_find_sample
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_double(2*N_det_generators+1)
+  call check_mem(rss,irp_here)
+
+  allocate(tilde_w(N_det_generators), tilde_cW(0:N_det_generators))
+  
+  tilde_cW(0) = 0d0
+  
+  do i=1,N_det_generators
+    tilde_w(i)  = psi_coef_sorted_gen(i,pt2_stoch_istate)**2 !+ 1.d-20
+  enddo
+
+  double precision :: norm
+  norm = 0.d0
+  do i=N_det_generators,1,-1
+    norm += tilde_w(i) 
+  enddo
+
+  tilde_w(:) = tilde_w(:) / norm
+
+  tilde_cW(0) = -1.d0
+  do i=1,N_det_generators
+    tilde_cW(i) = tilde_cW(i-1) + tilde_w(i)
+  enddo
+  tilde_cW(:) = tilde_cW(:) + 1.d0
+  
+  pt2_n_0(1) = 0
+  do
+    pt2_u_0 = tilde_cW(pt2_n_0(1))
+    r = tilde_cW(pt2_n_0(1) + pt2_minDetInFirstTeeth)
+    pt2_W_T = (1d0 - pt2_u_0) / dble(pt2_N_teeth)
+    if(pt2_W_T >= r - pt2_u_0) then
+      exit
+    end if
+    pt2_n_0(1) += 1
+    if(N_det_generators - pt2_n_0(1) < pt2_minDetInFirstTeeth * pt2_N_teeth) then
+      stop "teeth building failed"
+    end if
+  end do
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    
+  do t=2, pt2_N_teeth
+    r = pt2_u_0 + pt2_W_T * dble(t-1)
+    pt2_n_0(t) = pt2_find_sample(r, tilde_cW)
+  end do
+  pt2_n_0(pt2_N_teeth+1) = N_det_generators
+    
+  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  pt2_w(:pt2_n_0(1)) = tilde_w(:pt2_n_0(1))
+  do t=1, pt2_N_teeth
+    tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t))
+    if (tooth_width == 0.d0) then
+      tooth_width = sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1)))
+    endif
+    ASSERT(tooth_width > 0.d0)
+    do i=pt2_n_0(t)+1, pt2_n_0(t+1)
+      pt2_w(i) = tilde_w(i) * pt2_W_T / tooth_width
+    end do
+  end do
+  
+  pt2_cW(0) = 0d0
+  do i=1,N_det_generators
+    pt2_cW(i) = pt2_cW(i-1) + pt2_w(i)      
+  end do
+  pt2_n_0(pt2_N_teeth+1) = N_det_generators
+END_PROVIDER
+
+
+
+
+
diff --git a/src/fci/run_pt2_slave.irp.f b/src/fci/run_pt2_slave.irp.f
new file mode 100644
index 00000000..33ee418e
--- /dev/null
+++ b/src/fci/run_pt2_slave.irp.f
@@ -0,0 +1,247 @@
+
+subroutine run_pt2_slave(thread,iproc,energy)
+  use f77_zmq
+  use selection_types
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states_diag)
+  integer,  intent(in)            :: thread, iproc
+  integer                         :: rc, i
+
+  integer                        :: worker_id, ctask, ltask
+  character*(512), allocatable   :: task(:)
+  integer, allocatable           :: task_id(:)
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket
+  integer(ZMQ_PTR)               :: zmq_socket_push
+
+  type(selection_buffer) :: buf
+  logical :: done
+
+  double precision,allocatable :: pt2(:,:), variance(:,:), norm(:,:)
+  integer :: n_tasks, k
+  integer, allocatable :: i_generator(:), subset(:)
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss  = memory_of_int(pt2_n_tasks_max)*67.d0 
+  rss += memory_of_double(pt2_n_tasks_max)*(N_states*3)
+  call check_mem(rss,irp_here)
+
+  allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max))
+  allocate(pt2(N_states,pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max))
+  allocate(variance(N_states,pt2_n_tasks_max))
+  allocate(norm(N_states,pt2_n_tasks_max))
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+    return
+  endif
+           
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  buf%N = 0
+  n_tasks = 1
+  call create_selection_buffer(0, 0, buf)
+
+  done = .False.
+  n_tasks = 1
+  do while (.not.done)
+
+    n_tasks = max(1,n_tasks)
+    n_tasks = min(pt2_n_tasks_max,n_tasks)
+
+    integer, external :: get_tasks_from_taskserver
+    if (get_tasks_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id, task, n_tasks) == -1) then
+      exit
+    endif
+    done = task_id(n_tasks) == 0
+    if (done) n_tasks = n_tasks-1
+    if (n_tasks == 0) exit
+
+    do k=1,n_tasks
+      read (task(k),*) subset(k), i_generator(k)
+    enddo
+
+    double precision :: time0, time1
+    call wall_time(time0)
+    do k=1,n_tasks
+        pt2(:,k) = 0.d0
+        variance(:,k) = 0.d0
+        norm(:,k) = 0.d0
+        buf%cur = 0
+!double precision :: time2
+!call wall_time(time2)
+        call select_connected(i_generator(k),energy,pt2(1,k),variance(1,k),norm(1,k),buf,subset(k),pt2_F(i_generator(k)))
+!call wall_time(time1)
+!print *,  i_generator(1), time1-time2, n_tasks, pt2_F(i_generator(1))
+    enddo
+    call wall_time(time1)
+!print *,  i_generator(1), time1-time0, n_tasks
+
+    integer, external :: tasks_done_to_taskserver
+    if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then
+      done = .true.
+    endif
+    call push_pt2_results(zmq_socket_push, i_generator, pt2, variance, norm, task_id, n_tasks)
+
+    ! Try to adjust n_tasks around nproc/8 seconds per job
+    n_tasks = min(2*n_tasks,int( dble(n_tasks * nproc/8) / (time1 - time0 + 1.d0)))
+  end do
+
+  integer, external :: disconnect_from_taskserver
+  do i=1,300
+    if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) /= -2) exit
+    call sleep(1)
+    print *,  'Retry disconnect...'
+  end do
+
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  call delete_selection_buffer(buf)
+end subroutine
+
+
+subroutine push_pt2_results(zmq_socket_push, index, pt2, variance, norm, task_id, n_tasks)
+  use f77_zmq
+  use selection_types
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  double precision, intent(in)   :: pt2(N_states,n_tasks)
+  double precision, intent(in)   :: variance(N_states,n_tasks)
+  double precision, intent(in)   :: norm(N_states,n_tasks)
+  integer, intent(in) :: n_tasks, index(n_tasks), task_id(n_tasks)
+  integer :: rc
+
+  rc = f77_zmq_send( zmq_socket_push, n_tasks, 4, ZMQ_SNDMORE)
+  if (rc == -1) then
+    return
+  endif
+  if(rc /= 4) stop 'push'
+
+
+  rc = f77_zmq_send( zmq_socket_push, index, 4*n_tasks, ZMQ_SNDMORE)
+  if (rc == -1) then
+    return
+  endif
+  if(rc /= 4*n_tasks) stop 'push'
+
+
+  rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states*n_tasks, ZMQ_SNDMORE)
+  if (rc == -1) then
+    return
+  endif
+  if(rc /= 8*N_states*n_tasks) stop 'push'
+
+  rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states*n_tasks, ZMQ_SNDMORE)
+  if (rc == -1) then
+    return
+  endif
+  if(rc /= 8*N_states*n_tasks) stop 'push'
+
+  rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states*n_tasks, ZMQ_SNDMORE)
+  if (rc == -1) then
+    return
+  endif
+  if(rc /= 8*N_states*n_tasks) stop 'push'
+
+  rc = f77_zmq_send( zmq_socket_push, task_id, n_tasks*4, 0)
+  if (rc == -1) then
+    return
+  endif
+  if(rc /= 4*n_tasks) stop 'push'
+
+! Activate is zmq_socket_push is a REQ
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  character*(2) :: ok
+  rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
+  if (rc == -1) then
+    return
+  endif
+  if ((rc /= 2).and.(ok(1:2) /= 'ok')) then
+    print *,  irp_here//': error in receiving ok'
+    stop -1
+  endif
+IRP_ENDIF
+
+end subroutine
+
+
+subroutine pull_pt2_results(zmq_socket_pull, index, pt2, variance, norm, task_id, n_tasks)
+  use f77_zmq
+  use selection_types
+  implicit none
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  double precision, intent(inout) :: pt2(N_states,*)
+  double precision, intent(inout) :: variance(N_states,*)
+  double precision, intent(inout) :: norm(N_states,*)
+  integer, intent(out) :: index(*)
+  integer, intent(out) :: n_tasks, task_id(*)
+  integer :: rc, rn, i
+
+  rc = f77_zmq_recv( zmq_socket_pull, n_tasks, 4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  endif
+  if(rc /= 4) stop 'pull'
+
+  rc = f77_zmq_recv( zmq_socket_pull, index, 4*n_tasks, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  endif
+  if(rc /= 4*n_tasks) stop 'pull'
+
+  rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8*n_tasks, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  endif
+  if(rc /= 8*N_states*n_tasks) stop 'pull'
+
+  rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8*n_tasks, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  endif
+  if(rc /= 8*N_states*n_tasks) stop 'pull'
+
+  rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8*n_tasks, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  endif
+  if(rc /= 8*N_states*n_tasks) stop 'pull'
+
+  rc = f77_zmq_recv( zmq_socket_pull, task_id, n_tasks*4, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  endif
+  if(rc /= 4*n_tasks) stop 'pull'
+
+! Activate is zmq_socket_pull is a REP
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0)
+  if (rc == -1) then
+    n_tasks = 1
+    task_id(1) = 0
+  endif
+  if (rc /= 2) then
+    print *,  irp_here//': error in sending ok'
+    stop -1
+  endif
+IRP_ENDIF
+
+end subroutine
+ 
diff --git a/src/fci/run_selection_slave.irp.f b/src/fci/run_selection_slave.irp.f
new file mode 100644
index 00000000..bd171fbd
--- /dev/null
+++ b/src/fci/run_selection_slave.irp.f
@@ -0,0 +1,254 @@
+subroutine run_selection_slave(thread,iproc,energy)
+  use f77_zmq
+  use selection_types
+  implicit none
+
+  double precision, intent(in)    :: energy(N_states)
+  integer,  intent(in)            :: thread, iproc
+  integer                        :: rc, i
+
+  integer                        :: worker_id, task_id(1), ctask, ltask
+  character*(512)                :: task
+
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_push_socket
+  integer(ZMQ_PTR)               :: zmq_socket_push
+
+  type(selection_buffer) :: buf, buf2
+  logical :: done, buffer_ready
+  double precision :: pt2(N_states)
+  double precision :: variance(N_states)
+  double precision :: norm(N_states)
+
+  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
+  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+  PROVIDE psi_bilinear_matrix_transp_order N_int pt2_F
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+
+  integer, external :: connect_to_taskserver 
+  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then 
+    call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket) 
+    return
+  endif
+
+  zmq_socket_push      = new_zmq_push_socket(thread)
+
+  buf%N = 0
+  buffer_ready = .False.
+  ctask = 1
+  pt2(:) = 0d0
+  variance(:) = 0d0
+  norm(:) = 0.d0
+
+  do
+    integer, external :: get_task_from_taskserver
+    if (get_task_from_taskserver(zmq_to_qp_run_socket,worker_id, task_id(ctask), task) == -1) then
+      exit
+    endif
+    done = task_id(ctask) == 0
+    if (done) then
+      ctask = ctask - 1
+    else
+      integer :: i_generator, N, subset
+      read(task,*) subset, i_generator, N
+      if(buf%N == 0) then
+        ! Only first time 
+        call create_selection_buffer(N, N*2, buf)
+        call create_selection_buffer(N, N*2, buf2)
+        buffer_ready = .True.
+      else
+        ASSERT (N == buf%N)
+      end if
+      call select_connected(i_generator,energy,pt2,variance,norm,buf,subset,pt2_F(i_generator))
+    endif
+
+    integer, external :: task_done_to_taskserver
+
+    if(done .or. ctask == size(task_id)) then
+      do i=1, ctask
+         if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then
+           call sleep(1)
+          if (task_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id(i)) == -1) then
+            ctask = 0
+            done = .true.
+            exit
+          endif
+         endif
+      end do
+      if(ctask > 0) then
+        call sort_selection_buffer(buf)
+        call merge_selection_buffers(buf,buf2)
+        call push_selection_results(zmq_socket_push, pt2, variance, norm, buf, task_id(1), ctask)
+        buf%mini = buf2%mini
+        pt2(:) = 0d0
+        variance(:) = 0d0
+        norm(:) = 0d0
+        buf%cur = 0
+      end if
+      ctask = 0
+    end if
+
+    if(done) exit
+    ctask = ctask + 1
+  end do
+
+
+  integer, external :: disconnect_from_taskserver 
+  if (disconnect_from_taskserver(zmq_to_qp_run_socket,worker_id) == -1) then 
+    continue 
+  endif 
+
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+  call end_zmq_push_socket(zmq_socket_push,thread)
+  if (buffer_ready) then
+    call delete_selection_buffer(buf)
+    call delete_selection_buffer(buf2)
+  endif
+end subroutine
+
+
+subroutine push_selection_results(zmq_socket_push, pt2, variance, norm, b, task_id, ntask)
+  use f77_zmq
+  use selection_types
+  implicit none
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_push
+  double precision, intent(in)   :: pt2(N_states)
+  double precision, intent(in)   :: variance(N_states)
+  double precision, intent(in)   :: norm(N_states)
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in) :: ntask, task_id(*)
+  integer :: rc
+
+  rc = f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)
+  if(rc /= 4) then
+    print *,  'f77_zmq_send( zmq_socket_push, b%cur, 4, ZMQ_SNDMORE)'
+  endif
+
+  if (b%cur > 0) then
+
+      rc = f77_zmq_send( zmq_socket_push, pt2, 8*N_states, ZMQ_SNDMORE)
+      if(rc /= 8*N_states) then
+        print *,  'f77_zmq_send( zmq_socket_push, pt2, 8*N_states, ZMQ_SNDMORE)'
+      endif
+
+      rc = f77_zmq_send( zmq_socket_push, variance, 8*N_states, ZMQ_SNDMORE)
+      if(rc /= 8*N_states) then
+        print *,  'f77_zmq_send( zmq_socket_push, variance, 8*N_states, ZMQ_SNDMORE)'
+      endif
+
+      rc = f77_zmq_send( zmq_socket_push, norm, 8*N_states, ZMQ_SNDMORE)
+      if(rc /= 8*N_states) then
+        print *,  'f77_zmq_send( zmq_socket_push, norm, 8*N_states, ZMQ_SNDMORE)'
+      endif
+
+      rc = f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE)
+      if(rc /= 8*b%cur) then
+        print *,  'f77_zmq_send( zmq_socket_push, b%val(1), 8*b%cur, ZMQ_SNDMORE)'
+      endif
+
+      rc = f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE)
+      if(rc /= bit_kind*N_int*2*b%cur) then
+        print *,  'f77_zmq_send( zmq_socket_push, b%det(1,1,1), bit_kind*N_int*2*b%cur, ZMQ_SNDMORE)'
+      endif
+
+  endif
+
+  rc = f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE)
+  if(rc /= 4) then
+    print *,  'f77_zmq_send( zmq_socket_push, ntask, 4, ZMQ_SNDMORE)'
+  endif
+
+  rc = f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0)
+  if(rc /= 4*ntask) then
+    print *,  'f77_zmq_send( zmq_socket_push, task_id(1), ntask*4, 0)'
+  endif
+
+! Activate is zmq_socket_push is a REQ
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  character*(2) :: ok
+  rc = f77_zmq_recv( zmq_socket_push, ok, 2, 0)
+  if ((rc /= 2).and.(ok(1:2) /= 'ok')) then
+    print *,  irp_here//': error in receiving ok'
+    stop -1
+  endif
+IRP_ENDIF
+
+end subroutine
+
+
+subroutine pull_selection_results(zmq_socket_pull, pt2, variance, norm, val, det, N, task_id, ntask)
+  use f77_zmq
+  use selection_types
+  implicit none
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  double precision, intent(inout) :: pt2(N_states)
+  double precision, intent(inout) :: variance(N_states)
+  double precision, intent(inout) :: norm(N_states)
+  double precision, intent(out) :: val(*)
+  integer(bit_kind), intent(out) :: det(N_int, 2, *)
+  integer, intent(out) :: N, ntask, task_id(*)
+  integer :: rc, rn, i
+
+  rc = f77_zmq_recv( zmq_socket_pull, N, 4, 0)
+  if(rc /= 4) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, N, 4, 0)'
+  endif
+
+  if (N>0) then
+      rc = f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0)
+      if(rc /= 8*N_states) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, pt2, N_states*8, 0)'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, variance, N_states*8, 0)
+      if(rc /= 8*N_states) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, variance, N_states*8, 0)'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, norm, N_states*8, 0)
+      if(rc /= 8*N_states) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, norm, N_states*8, 0)'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)
+      if(rc /= 8*N) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, val(1), 8*N, 0)'
+      endif
+
+      rc = f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0)
+      if(rc /= bit_kind*N_int*2*N) then
+        print *,  'f77_zmq_recv( zmq_socket_pull, det(1,1,1), bit_kind*N_int*2*N, 0)'
+      endif
+  else
+      pt2(:) = 0.d0
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, ntask, 4, 0)
+  if(rc /= 4) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, ntask, 4, 0)'
+  endif
+
+  rc = f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0)
+  if(rc /= 4*ntask) then
+    print *,  'f77_zmq_recv( zmq_socket_pull, task_id(1), ntask*4, 0)'
+  endif
+
+! Activate is zmq_socket_pull is a REP
+IRP_IF ZMQ_PUSH
+IRP_ELSE
+  rc = f77_zmq_send( zmq_socket_pull, 'ok', 2, 0)
+  if (rc /= 2) then
+    print *,  irp_here//': error in sending ok'
+    stop -1
+  endif
+IRP_ENDIF
+end subroutine
+ 
+ 
+
diff --git a/src/fci/selection.irp.f b/src/fci/selection.irp.f
new file mode 100644
index 00000000..4b3a989c
--- /dev/null
+++ b/src/fci/selection.irp.f
@@ -0,0 +1,1371 @@
+use bitmasks
+
+
+subroutine get_mask_phase(det1, pm, Nint)
+  use bitmasks
+  implicit none
+  integer, intent(in) :: Nint
+  integer(bit_kind), intent(in) :: det1(Nint,2)
+  integer(bit_kind), intent(out) :: pm(Nint,2)
+  integer(bit_kind) :: tmp
+ integer :: ispin, i
+  do ispin=1,2
+  tmp = 0_8
+  do i=1,Nint
+    pm(i,ispin) = ieor(det1(i,ispin), shiftl(det1(i,ispin), 1))
+    pm(i,ispin) = ieor(pm(i,ispin), shiftl(pm(i,ispin), 2))
+    pm(i,ispin) = ieor(pm(i,ispin), shiftl(pm(i,ispin), 4))
+    pm(i,ispin) = ieor(pm(i,ispin), shiftl(pm(i,ispin), 8))
+    pm(i,ispin) = ieor(pm(i,ispin), shiftl(pm(i,ispin), 16))
+    pm(i,ispin) = ieor(pm(i,ispin), shiftl(pm(i,ispin), 32))
+    pm(i,ispin) = ieor(pm(i,ispin), tmp)
+    if(iand(popcnt(det1(i,ispin)), 1) == 1) tmp = not(tmp)
+  end do
+  end do
+
+end subroutine
+
+
+subroutine select_connected(i_generator,E0,pt2,variance,norm,b,subset,csubset)
+  use bitmasks
+  use selection_types
+  implicit none
+  integer, intent(in)            :: i_generator, subset, csubset
+  type(selection_buffer), intent(inout) :: b
+  double precision, intent(inout)  :: pt2(N_states)
+  double precision, intent(inout)  :: variance(N_states)
+  double precision, intent(inout)  :: norm(N_states)
+  integer :: k,l
+  double precision, intent(in)   :: E0(N_states)
+
+  integer(bit_kind)              :: hole_mask(N_int,2), particle_mask(N_int,2)
+
+  double precision, allocatable  :: fock_diag_tmp(:,:)
+
+  allocate(fock_diag_tmp(2,mo_tot_num+1))
+  
+  call build_fock_tmp(fock_diag_tmp,psi_det_generators(1,1,i_generator),N_int)
+
+  do l=1,N_generators_bitmask
+    do k=1,N_int
+      hole_mask(k,1) = iand(generators_bitmask(k,1,s_hole,l), psi_det_generators(k,1,i_generator))
+      hole_mask(k,2) = iand(generators_bitmask(k,2,s_hole,l), psi_det_generators(k,2,i_generator))
+      particle_mask(k,1) = iand(generators_bitmask(k,1,s_part,l), not(psi_det_generators(k,1,i_generator)) )
+      particle_mask(k,2) = iand(generators_bitmask(k,2,s_part,l), not(psi_det_generators(k,2,i_generator)) )
+    enddo
+    call select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm,b,subset,csubset)
+  enddo
+  deallocate(fock_diag_tmp)
+end subroutine
+
+
+double precision function get_phase_bi(phasemask, s1, s2, h1, p1, h2, p2, Nint)
+  use bitmasks
+  implicit none
+
+  integer, intent(in) :: Nint
+  integer(bit_kind), intent(in) :: phasemask(Nint,2)
+  integer, intent(in) :: s1, s2, h1, h2, p1, p2
+  logical :: change
+  integer :: np
+  double precision, save :: res(0:1) = (/1d0, -1d0/)
+
+  integer :: h1_int, h2_int
+  integer :: p1_int, p2_int
+  integer :: h1_bit, h2_bit
+  integer :: p1_bit, p2_bit
+  h1_int = shiftr(h1-1,bit_kind_shift)+1
+  h1_bit = h1 - shiftl(h1_int-1,bit_kind_shift)-1
+
+  h2_int = shiftr(h2-1,bit_kind_shift)+1
+  h2_bit = h2 - shiftl(h2_int-1,bit_kind_shift)-1
+
+  p1_int = shiftr(p1-1,bit_kind_shift)+1
+  p1_bit = p1 - shiftl(p1_int-1,bit_kind_shift)-1
+
+  p2_int = shiftr(p2-1,bit_kind_shift)+1
+  p2_bit = p2 - shiftl(p2_int-1,bit_kind_shift)-1
+
+
+  ! Put the phasemask bits at position 0, and add them all
+  h1_bit = int(shiftr(phasemask(h1_int,s1),h1_bit))
+  p1_bit = int(shiftr(phasemask(p1_int,s1),p1_bit))
+  h2_bit = int(shiftr(phasemask(h2_int,s2),h2_bit))
+  p2_bit = int(shiftr(phasemask(p2_int,s2),p2_bit))
+
+  np = h1_bit + p1_bit + h2_bit + p2_bit
+
+  if(p1 < h1) np = np + 1
+  if(p2 < h2) np = np + 1
+  
+  if(s1 == s2 .and. max(h1, p1) > min(h2, p2)) np = np + 1
+  get_phase_bi = res(iand(np,1))
+end 
+
+
+
+subroutine get_m2(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs)
+  use bitmasks
+  implicit none
+  
+  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_tot_num)
+  double precision, intent(in) :: coefs(N_states)
+  double precision, intent(inout) :: vect(N_states, mo_tot_num)
+  integer, intent(in) :: sp, h(0:2, 2), p(0:3, 2)
+  integer :: i, j, k, h1, h2, p1, p2, sfix, hfix, pfix, hmob, pmob, puti
+  double precision :: hij
+  double precision, external :: get_phase_bi, mo_bielec_integral
+  
+  integer, parameter :: turn3_2(2,3) = reshape((/2,3, 1,3, 1,2/), (/2,3/))
+  integer, parameter :: turn2(2) = (/2,1/) 
+  
+  if(h(0,sp) == 2) then
+    h1 = h(1, sp)
+    h2 = h(2, sp)
+    do i=1,3
+      puti = p(i, sp)
+      if(bannedOrb(puti)) cycle
+      p1 = p(turn3_2(1,i), sp)
+      p2 = p(turn3_2(2,i), sp)
+      hij = mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2, p1, h1, h2)
+      hij = hij * get_phase_bi(phasemask, sp, sp, h1, p1, h2, p2, N_int)
+      do k=1,N_states
+        vect(k,puti) = vect(k,puti) + hij * coefs(k)
+      enddo
+    end do
+  else if(h(0,sp) == 1) then
+    sfix = turn2(sp)
+    hfix = h(1,sfix)
+    pfix = p(1,sfix)
+    hmob = h(1,sp)
+    do j=1,2
+      puti = p(j, sp)
+      if(bannedOrb(puti)) cycle
+      pmob = p(turn2(j), sp)
+      hij = mo_bielec_integral(pmob, pfix, hmob, hfix)
+      hij = hij * get_phase_bi(phasemask, sp, sfix, hmob, pmob, hfix, pfix, N_int)
+      do k=1,N_states
+        vect(k,puti) = vect(k,puti) + hij * coefs(k)
+      enddo
+    end do
+  else
+    puti = p(1,sp)
+    if(.not. bannedOrb(puti)) then
+      sfix = turn2(sp)
+      p1 = p(1,sfix)
+      p2 = p(2,sfix)
+      h1 = h(1,sfix)
+      h2 = h(2,sfix)
+      hij = (mo_bielec_integral(p1,p2,h1,h2) - mo_bielec_integral(p2,p1,h1,h2))
+      hij = hij * get_phase_bi(phasemask, sfix, sfix, h1, p1, h2, p2, N_int)
+      do k=1,N_states
+        vect(k,puti) = vect(k,puti) + hij * coefs(k)
+      enddo
+    end if
+  end if
+end 
+
+
+
+subroutine get_m1(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs)
+  use bitmasks
+  implicit none
+  
+  integer(bit_kind), intent(in)  :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_tot_num)
+  double precision, intent(in)   :: coefs(N_states)
+  double precision, intent(inout) :: vect(N_states, mo_tot_num)
+  integer, intent(in)            :: sp, h(0:2, 2), p(0:3, 2)
+  integer                        :: i, hole, p1, p2, sh, k
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision               :: hij
+  double precision, external     :: get_phase_bi, mo_bielec_integral
+  
+  allocate (lbanned(mo_tot_num))
+  lbanned = bannedOrb
+  sh = 1
+  if(h(0,2) == 1) sh = 2
+  hole = h(1, sh)
+  lbanned(p(1,sp)) = .true.
+  if(p(0,sp) == 2) lbanned(p(2,sp)) = .true.
+  !print *, "SPm1", sp, sh
+  
+  p1 = p(1, sp)
+  
+  if(sp == sh) then
+    p2 = p(2, sp)
+    lbanned(p2) = .true.
+    
+
+    double precision :: hij_cache(mo_tot_num,2)
+    call get_mo_bielec_integrals(hole,p1,p2,mo_tot_num,hij_cache(1,1),mo_integrals_map)
+    call get_mo_bielec_integrals(hole,p2,p1,mo_tot_num,hij_cache(1,2),mo_integrals_map)
+
+    do i=1,hole-1
+      if(lbanned(i)) cycle
+      hij = hij_cache(i,1)-hij_cache(i,2)
+      if (hij /= 0.d0) then
+        hij = hij * get_phase_bi(phasemask, sp, sp, i, p1, hole, p2, N_int)
+        do k=1,N_states
+          vect(k,i) = vect(k,i) + hij * coefs(k)
+        enddo
+      endif
+    end do
+    do i=hole+1,mo_tot_num
+      if(lbanned(i)) cycle
+      hij = hij_cache(i,2)-hij_cache(i,1)
+      if (hij /= 0.d0) then
+        hij = hij * get_phase_bi(phasemask, sp, sp, hole, p1, i, p2, N_int)
+        do k=1,N_states
+          vect(k,i) = vect(k,i) + hij * coefs(k)
+        enddo
+      endif
+    end do
+
+    call apply_particle(mask, sp, p2, det, ok,  N_int)
+    call i_h_j(gen, det, N_int, hij)
+    do k=1,N_states
+      vect(k,p2) = vect(k,p2) + hij * coefs(k)
+    enddo
+  else
+    p2 = p(1, sh)
+    call get_mo_bielec_integrals(hole,p1,p2,mo_tot_num,hij_cache(1,1),mo_integrals_map)
+    do i=1,mo_tot_num
+      if(lbanned(i)) cycle
+      hij = hij_cache(i,1)
+      if (hij /= 0.d0) then
+        hij = hij * get_phase_bi(phasemask, sp, sh, i, p1, hole, p2, N_int)
+        do k=1,N_states
+          vect(k,i) = vect(k,i) + hij * coefs(k)
+        enddo
+      endif
+    end do
+  end if
+  deallocate(lbanned)
+
+  call apply_particle(mask, sp, p1, det, ok,  N_int)
+  call i_h_j(gen, det, N_int, hij)
+  do k=1,N_states
+    vect(k,p1) = vect(k,p1) + hij * coefs(k)
+  enddo
+end 
+
+
+subroutine get_m0(gen, phasemask, bannedOrb, vect, mask, h, p, sp, coefs)
+  use bitmasks
+  implicit none
+  
+  integer(bit_kind), intent(in)  :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_tot_num)
+  double precision, intent(in)   :: coefs(N_states)
+  double precision, intent(inout) :: vect(N_states, mo_tot_num)
+  integer, intent(in)            :: sp, h(0:2, 2), p(0:3, 2)
+  integer                        :: i,k
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision               :: hij
+  
+  allocate(lbanned(mo_tot_num))
+  lbanned = bannedOrb
+  lbanned(p(1,sp)) = .true.
+  do i=1,mo_tot_num
+    if(lbanned(i)) cycle
+    call apply_particle(mask, sp, i, det, ok, N_int)
+    call i_h_j(gen, det, N_int, hij)
+    do k=1,N_states
+      vect(k,i) = vect(k,i) + hij * coefs(k)
+    enddo
+  end do
+  deallocate(lbanned)
+end 
+
+
+subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_diag_tmp,E0,pt2,variance,norm,buf,subset,csubset)
+  use bitmasks
+  use selection_types
+  implicit none
+  BEGIN_DOC
+!            WARNING /!\ : It is assumed that the generators and selectors are psi_det_sorted
+  END_DOC
+  
+  integer, intent(in)            :: i_generator, subset, csubset
+  integer(bit_kind), intent(in)  :: hole_mask(N_int,2), particle_mask(N_int,2)
+  double precision, intent(in)   :: fock_diag_tmp(mo_tot_num)
+  double precision, intent(in)   :: E0(N_states)
+  double precision, intent(inout) :: pt2(N_states)
+  double precision, intent(inout)  :: variance(N_states)
+  double precision, intent(inout)  :: norm(N_states)
+  type(selection_buffer), intent(inout) :: buf
+  
+  integer                         :: h1,h2,s1,s2,s3,i1,i2,ib,sp,k,i,j,nt,ii
+  integer(bit_kind)               :: hole(N_int,2), particle(N_int,2), mask(N_int, 2), pmask(N_int, 2)
+  logical                         :: fullMatch, ok
+  
+  integer(bit_kind) :: mobMask(N_int, 2), negMask(N_int, 2)
+  integer,allocatable               :: preinteresting(:), prefullinteresting(:), interesting(:), fullinteresting(:)
+  integer(bit_kind), allocatable :: minilist(:, :, :), fullminilist(:, :, :)
+  logical, allocatable           :: banned(:,:,:), bannedOrb(:,:)
+
+  double precision, allocatable   :: mat(:,:,:)
+  
+  logical :: monoAdo, monoBdo
+  integer :: maskInd
+
+  integer(bit_kind), allocatable:: preinteresting_det(:,:,:)
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_int( (8*N_int+5)*N_det + N_det_alpha_unique + 4*N_int*N_det_selectors)
+  rss += memory_of_double(mo_tot_num*mo_tot_num*(N_states+1))
+  call check_mem(rss,irp_here)
+  allocate (preinteresting_det(N_int,2,N_det))
+
+  monoAdo = .true.
+  monoBdo = .true.
+
+  
+  do k=1,N_int
+    hole    (k,1) = iand(psi_det_generators(k,1,i_generator), hole_mask(k,1))
+    hole    (k,2) = iand(psi_det_generators(k,2,i_generator), hole_mask(k,2))
+    particle(k,1) = iand(not(psi_det_generators(k,1,i_generator)), particle_mask(k,1))
+    particle(k,2) = iand(not(psi_det_generators(k,2,i_generator)), particle_mask(k,2))
+  enddo
+  
+  
+  integer                        :: N_holes(2), N_particles(2)
+  integer                        :: hole_list(N_int*bit_kind_size,2)
+  integer                        :: particle_list(N_int*bit_kind_size,2)
+
+  call bitstring_to_list_ab(hole    , hole_list    , N_holes    , N_int)
+  call bitstring_to_list_ab(particle, particle_list, N_particles, N_int)
+
+  integer :: l_a, nmax, idx
+  integer, allocatable :: indices(:), exc_degree(:), iorder(:)
+  allocate (indices(N_det),  &
+            exc_degree(max(N_det_alpha_unique,N_det_beta_unique)))
+
+  PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+  PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
+  PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+  PROVIDE psi_bilinear_matrix_transp_order
+
+  k=1
+  do i=1,N_det_alpha_unique
+    call get_excitation_degree_spin(psi_det_alpha_unique(1,i), &
+      psi_det_generators(1,1,i_generator), exc_degree(i), N_int)
+  enddo
+
+  do j=1,N_det_beta_unique
+    call get_excitation_degree_spin(psi_det_beta_unique(1,j), &
+      psi_det_generators(1,2,i_generator), nt, N_int)
+    if (nt > 2) cycle
+    do l_a=psi_bilinear_matrix_columns_loc(j), psi_bilinear_matrix_columns_loc(j+1)-1
+      i = psi_bilinear_matrix_rows(l_a)
+      if (nt + exc_degree(i) <= 4) then
+        idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a))
+        if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle
+        indices(k) = idx
+        k=k+1
+      endif
+    enddo
+  enddo
+  
+  do i=1,N_det_beta_unique
+    call get_excitation_degree_spin(psi_det_beta_unique(1,i), &
+      psi_det_generators(1,2,i_generator), exc_degree(i), N_int)
+  enddo
+
+  do j=1,N_det_alpha_unique
+    call get_excitation_degree_spin(psi_det_alpha_unique(1,j), &
+      psi_det_generators(1,1,i_generator), nt, N_int)
+    if (nt > 1) cycle
+    do l_a=psi_bilinear_matrix_transp_rows_loc(j), psi_bilinear_matrix_transp_rows_loc(j+1)-1
+      i = psi_bilinear_matrix_transp_columns(l_a)
+      if (exc_degree(i) < 3) cycle
+      if (nt + exc_degree(i) <= 4) then
+        idx = psi_det_sorted_order(                   &
+                 psi_bilinear_matrix_order(           &
+                 psi_bilinear_matrix_transp_order(l_a)))
+        if (psi_average_norm_contrib_sorted(idx) < 1.d-12) cycle
+        indices(k) = idx
+        k=k+1
+      endif
+    enddo
+  enddo
+
+  deallocate(exc_degree)
+  nmax=k-1
+
+  allocate(iorder(nmax))
+  do i=1,nmax
+    iorder(i) = i
+  enddo
+  call isort(indices,iorder,nmax)
+  deallocate(iorder)
+
+  allocate(preinteresting(0:N_det), prefullinteresting(0:N_det), &
+            interesting(0:N_det), fullinteresting(0:N_det))
+  preinteresting(0) = 0
+  prefullinteresting(0) = 0
+  
+  do i=1,N_int
+    negMask(i,1) = not(psi_det_generators(i,1,i_generator))
+    negMask(i,2) = not(psi_det_generators(i,2,i_generator))
+  end do
+  
+  do k=1,nmax
+    i = indices(k)
+    mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i))
+    mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i))
+    nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) 
+    do j=2,N_int
+      mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i))
+      mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i))
+      nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2)) 
+    end do
+
+    if(nt <= 4) then
+      if(i <= N_det_selectors) then
+        preinteresting(0) = preinteresting(0) + 1
+        preinteresting(preinteresting(0)) = i
+        do j=1,N_int
+          preinteresting_det(j,1,preinteresting(0)) = psi_det_sorted(j,1,i)
+          preinteresting_det(j,2,preinteresting(0)) = psi_det_sorted(j,2,i)
+        enddo
+      else if(nt <= 2) then
+        prefullinteresting(0) = prefullinteresting(0) + 1
+        prefullinteresting(prefullinteresting(0)) = i
+      end if
+    end if
+  end do
+  deallocate(indices)
+  
+  allocate(minilist(N_int, 2, N_det_selectors), fullminilist(N_int, 2, N_det))
+  allocate(banned(mo_tot_num, mo_tot_num,2), bannedOrb(mo_tot_num, 2))
+  allocate (mat(N_states, mo_tot_num, mo_tot_num))
+  maskInd = -1
+
+  integer :: nb_count, maskInd_save
+  logical :: monoBdo_save
+  logical :: found
+  do s1=1,2
+    do i1=N_holes(s1),1,-1   ! Generate low excitations first
+
+      found = .False.
+      monoBdo_save = monoBdo
+      maskInd_save = maskInd
+      do s2=s1,2
+        ib = 1
+        if(s1 == s2) ib = i1+1
+        do i2=N_holes(s2),ib,-1
+          maskInd = maskInd + 1
+          if(mod(maskInd, csubset) == (subset-1)) then  
+            found = .True.
+          end if
+        enddo
+        if(s1 /= s2) monoBdo = .false.
+      enddo
+
+      if (.not.found) cycle
+      monoBdo = monoBdo_save
+      maskInd = maskInd_save
+
+      h1 = hole_list(i1,s1)
+      call apply_hole(psi_det_generators(1,1,i_generator), s1,h1, pmask, ok, N_int)
+      
+      negMask = not(pmask)
+      
+      interesting(0) = 0
+      fullinteresting(0) = 0
+      
+      do ii=1,preinteresting(0)
+        select case (N_int)
+          case (1)
+            mobMask(1,1) = iand(negMask(1,1), preinteresting_det(1,1,ii))
+            mobMask(1,2) = iand(negMask(1,2), preinteresting_det(1,2,ii))
+            nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+          case (2)
+            mobMask(1:2,1) = iand(negMask(1:2,1), preinteresting_det(1:2,1,ii))
+            mobMask(1:2,2) = iand(negMask(1:2,2), preinteresting_det(1:2,2,ii))
+            nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2)) + &
+                 popcnt(mobMask(2, 1)) + popcnt(mobMask(2, 2)) 
+          case (3)
+            mobMask(1:3,1) = iand(negMask(1:3,1), preinteresting_det(1:3,1,ii))
+            mobMask(1:3,2) = iand(negMask(1:3,2), preinteresting_det(1:3,2,ii))
+            nt = 0
+            do j=3,1,-1
+              if (mobMask(j,1) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 1))
+                if (nt > 4) exit
+              endif
+              if (mobMask(j,2) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 2))
+                if (nt > 4) exit
+              endif
+            end do
+          case (4)
+            mobMask(1:4,1) = iand(negMask(1:4,1), preinteresting_det(1:4,1,ii))
+            mobMask(1:4,2) = iand(negMask(1:4,2), preinteresting_det(1:4,2,ii))
+            nt = 0
+            do j=4,1,-1
+              if (mobMask(j,1) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 1))
+                if (nt > 4) exit
+              endif
+              if (mobMask(j,2) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 2))
+                if (nt > 4) exit
+              endif
+            end do
+          case default
+            mobMask(1:N_int,1) = iand(negMask(1:N_int,1), preinteresting_det(1:N_int,1,ii))
+            mobMask(1:N_int,2) = iand(negMask(1:N_int,2), preinteresting_det(1:N_int,2,ii))
+            nt = 0 
+            do j=N_int,1,-1
+              if (mobMask(j,1) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 1))
+                if (nt > 4) exit
+              endif
+              if (mobMask(j,2) /= 0_bit_kind) then
+                nt = nt+ popcnt(mobMask(j, 2))
+                if (nt > 4) exit
+              endif
+            end do
+        end select
+        
+        if(nt <= 4) then
+          i = preinteresting(ii)
+          interesting(0) = interesting(0) + 1
+          interesting(interesting(0)) = i
+          minilist(1,1,interesting(0)) = preinteresting_det(1,1,ii)
+          minilist(1,2,interesting(0)) = preinteresting_det(1,2,ii)
+          do j=2,N_int
+            minilist(j,1,interesting(0)) = preinteresting_det(j,1,ii)
+            minilist(j,2,interesting(0)) = preinteresting_det(j,2,ii)
+          enddo
+          if(nt <= 2) then
+            fullinteresting(0) = fullinteresting(0) + 1
+            fullinteresting(fullinteresting(0)) = i
+            fullminilist(1,1,fullinteresting(0)) = preinteresting_det(1,1,ii)
+            fullminilist(1,2,fullinteresting(0)) = preinteresting_det(1,2,ii)
+            do j=2,N_int
+              fullminilist(j,1,fullinteresting(0)) = preinteresting_det(j,1,ii)
+              fullminilist(j,2,fullinteresting(0)) = preinteresting_det(j,2,ii)
+            enddo
+          end if
+        end if
+        
+      end do
+      
+      do ii=1,prefullinteresting(0)
+        i = prefullinteresting(ii)
+        nt = 0
+        mobMask(1,1) = iand(negMask(1,1), psi_det_sorted(1,1,i))
+        mobMask(1,2) = iand(negMask(1,2), psi_det_sorted(1,2,i))
+        nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+        if (nt > 2) cycle
+        do j=N_int,2,-1
+          mobMask(j,1) = iand(negMask(j,1), psi_det_sorted(j,1,i))
+          mobMask(j,2) = iand(negMask(j,2), psi_det_sorted(j,2,i))
+          nt = nt+ popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
+          if (nt > 2) exit
+        end do
+        
+        if(nt <= 2) then
+          fullinteresting(0) = fullinteresting(0) + 1
+          fullinteresting(fullinteresting(0)) = i
+          fullminilist(1,1,fullinteresting(0)) = psi_det_sorted(1,1,i)
+          fullminilist(1,2,fullinteresting(0)) = psi_det_sorted(1,2,i)
+          do j=2,N_int
+            fullminilist(j,1,fullinteresting(0)) = psi_det_sorted(j,1,i)
+            fullminilist(j,2,fullinteresting(0)) = psi_det_sorted(j,2,i)
+          enddo
+        end if
+      end do
+
+      do s2=s1,2
+        sp = s1
+        
+        if(s1 /= s2) sp = 3
+        
+        ib = 1
+        if(s1 == s2) ib = i1+1
+        monoAdo = .true.
+        do i2=N_holes(s2),ib,-1   ! Generate low excitations first
+          
+          h2 = hole_list(i2,s2)
+          call apply_hole(pmask, s2,h2, mask, ok, N_int)
+          banned = .false.
+          do j=1,mo_tot_num
+            bannedOrb(j, 1) = .true.
+            bannedOrb(j, 2) = .true.
+          enddo
+          do s3=1,2
+            do i=1,N_particles(s3)
+              bannedOrb(particle_list(i,s3), s3) = .false.
+            enddo
+          enddo
+          if(s1 /= s2) then
+            if(monoBdo) then
+              bannedOrb(h1,s1) = .false.
+            end if
+            if(monoAdo) then
+              bannedOrb(h2,s2) = .false.
+              monoAdo = .false.
+            end if
+          end if
+
+          maskInd = maskInd + 1
+          if(mod(maskInd, csubset) == (subset-1)) then  
+            
+            call spot_isinwf(mask, fullminilist, i_generator, fullinteresting(0), banned, fullMatch, fullinteresting)
+            if(fullMatch) cycle
+          
+            call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting)
+
+            call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf)
+          end if
+        enddo
+        if(s1 /= s2) monoBdo = .false.
+      enddo
+    enddo
+  enddo
+  deallocate(preinteresting, prefullinteresting, interesting, fullinteresting)
+  deallocate(minilist, fullminilist, banned, bannedOrb,mat)
+end subroutine
+
+
+
+subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2, variance, norm, mat, buf)
+  use bitmasks
+  use selection_types
+  implicit none
+  
+  integer, intent(in) :: i_generator, sp, h1, h2
+  double precision, intent(in) :: mat(N_states, mo_tot_num, mo_tot_num)
+  logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num)
+  double precision, intent(in)           :: fock_diag_tmp(mo_tot_num)
+  double precision, intent(in)    :: E0(N_states)
+  double precision, intent(inout) :: pt2(N_states) 
+  double precision, intent(inout) :: variance(N_states) 
+  double precision, intent(inout) :: norm(N_states) 
+  type(selection_buffer), intent(inout) :: buf
+  logical :: ok
+  integer :: s1, s2, p1, p2, ib, j, istate
+  integer(bit_kind) :: mask(N_int, 2), det(N_int, 2)
+  double precision :: e_pert, delta_E, val, Hii, sum_e_pert, tmp, alpha_h_psi, coef
+  double precision, external :: diag_H_mat_elem_fock
+  double precision :: E_shift
+  
+  logical, external :: detEq
+  
+  if(sp == 3) then
+    s1 = 1
+    s2 = 2
+  else
+    s1 = sp
+    s2 = sp
+  end if
+  
+  call apply_holes(psi_det_generators(1,1,i_generator), s1, h1, s2, h2, mask, ok, N_int)
+  E_shift = 0.d0
+  
+  if (h0_type == "SOP") then
+    j = det_to_occ_pattern(i_generator)
+    E_shift = psi_det_Hii(i_generator) - psi_occ_pattern_Hii(j)
+  endif
+
+  do p1=1,mo_tot_num
+    if(bannedOrb(p1, s1)) cycle
+    ib = 1
+    if(sp /= 3) ib = p1+1
+    do p2=ib,mo_tot_num
+      if(bannedOrb(p2, s2)) cycle
+      if(banned(p1,p2)) cycle
+
+      if( sum(abs(mat(1:N_states, p1, p2))) == 0d0) cycle
+      call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+      
+      Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int)
+
+      sum_e_pert = 0d0
+      
+      do istate=1,N_states
+        delta_E = E0(istate) - Hii + E_shift
+        alpha_h_psi = mat(istate, p1, p2) 
+        val = alpha_h_psi + alpha_h_psi
+        tmp = dsqrt(delta_E * delta_E + val * val)
+        if (delta_E < 0.d0) then
+            tmp = -tmp
+        endif
+        e_pert = 0.5d0 * (tmp - delta_E)
+        coef = alpha_h_psi / delta_E
+        pt2(istate) = pt2(istate) + e_pert
+        variance(istate) = variance(istate) + alpha_h_psi * alpha_h_psi 
+        norm(istate) = norm(istate) + coef * coef 
+
+        if (h0_type == "Variance") then
+          sum_e_pert = sum_e_pert - alpha_h_psi * alpha_h_psi * state_average_weight(istate)
+        else
+          sum_e_pert = sum_e_pert + e_pert * state_average_weight(istate)
+        endif
+      end do
+      
+      if(sum_e_pert <= buf%mini) then
+        call add_to_selection_buffer(buf, det, sum_e_pert)
+      end if
+    end do
+  end do
+end 
+
+
+subroutine splash_pq(mask, sp, det, i_gen, N_sel, bannedOrb, banned, mat, interesting)
+  use bitmasks
+  implicit none
+  
+  integer, intent(in)            :: sp, i_gen, N_sel
+  integer, intent(in)            :: interesting(0:N_sel)
+  integer(bit_kind),intent(in)   :: mask(N_int, 2), det(N_int, 2, N_sel)
+  logical, intent(inout)         :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num, 2)
+  double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num)
+  
+  integer                        :: i, ii, j, k, l, h(0:2,2), p(0:4,2), nt
+  integer(bit_kind)              :: perMask(N_int, 2), mobMask(N_int, 2), negMask(N_int, 2)
+  integer(bit_kind)             :: phasemask(N_int,2)
+
+  PROVIDE psi_selectors_coef_transp
+  mat = 0d0
+  
+  do i=1,N_int
+    negMask(i,1) = not(mask(i,1))
+    negMask(i,2) = not(mask(i,2))
+  end do
+
+  do i=1, N_sel ! interesting(0)
+    !i = interesting(ii)
+    if (interesting(i) < 0) then
+      stop 'prefetch interesting(i)'
+    endif
+
+    
+    mobMask(1,1) = iand(negMask(1,1), det(1,1,i))
+    mobMask(1,2) = iand(negMask(1,2), det(1,2,i))
+    nt = popcnt(mobMask(1, 1)) + popcnt(mobMask(1, 2))
+
+    if(nt > 4) cycle
+
+    do j=2,N_int
+      mobMask(j,1) = iand(negMask(j,1), det(j,1,i))
+      mobMask(j,2) = iand(negMask(j,2), det(j,2,i))
+      nt = nt + popcnt(mobMask(j, 1)) + popcnt(mobMask(j, 2))
+    end do
+
+    if(nt > 4) cycle
+    
+    if (interesting(i) == i_gen) then
+        if(sp == 3) then
+          do k=1,mo_tot_num
+            do j=1,mo_tot_num
+              banned(j,k,2) = banned(k,j,1)
+            enddo
+          enddo
+        else
+          do k=1,mo_tot_num
+          do l=k+1,mo_tot_num
+            banned(l,k,1) = banned(k,l,1)
+          end do
+          end do
+        end if
+    end if
+
+    call bitstring_to_list_in_selection(mobMask(1,1), p(1,1), p(0,1), N_int)
+    call bitstring_to_list_in_selection(mobMask(1,2), p(1,2), p(0,2), N_int)
+    
+    if (interesting(i) >= i_gen) then
+        perMask(1,1) = iand(mask(1,1), not(det(1,1,i)))
+        perMask(1,2) = iand(mask(1,2), not(det(1,2,i)))
+        do j=2,N_int
+          perMask(j,1) = iand(mask(j,1), not(det(j,1,i)))
+          perMask(j,2) = iand(mask(j,2), not(det(j,2,i)))
+        end do
+
+        call bitstring_to_list_in_selection(perMask(1,1), h(1,1), h(0,1), N_int)
+        call bitstring_to_list_in_selection(perMask(1,2), h(1,2), h(0,2), N_int)
+
+        call get_mask_phase(psi_det_sorted(1,1,interesting(i)), phasemask,N_int)
+        if(nt == 4) then
+          call get_d2(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
+        else if(nt == 3) then
+          call get_d1(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
+        else
+          call get_d0(det(1,1,i), phasemask, bannedOrb, banned, mat, mask, h, p, sp, psi_selectors_coef_transp(1, interesting(i)))
+        end if
+    else 
+        if(nt == 4) call past_d2(banned, p, sp)
+        if(nt == 3) call past_d1(bannedOrb, p)
+    end if
+  end do
+
+end 
+
+
+subroutine get_d2(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2)
+  double precision, intent(in) :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+  
+  double precision, external :: get_phase_bi, mo_bielec_integral
+  
+  integer :: i, j, k, tip, ma, mi, puti, putj
+  integer :: h1, h2, p1, p2, i1, i2
+  double precision :: hij, phase
+  
+  integer, parameter:: turn2d(2,3,4) = reshape((/0,0, 0,0, 0,0,  3,4, 0,0, 0,0,  2,4, 1,4, 0,0,  2,3, 1,3, 1,2 /), (/2,3,4/))
+  integer, parameter :: turn2(2) = (/2, 1/)
+  integer, parameter :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+  
+  integer :: bant
+  bant = 1
+
+  tip = p(0,1) * p(0,2)
+  
+  ma = sp
+  if(p(0,1) > p(0,2)) ma = 1
+  if(p(0,1) < p(0,2)) ma = 2
+  mi = mod(ma, 2) + 1
+  
+  if(sp == 3) then
+    if(ma == 2) bant = 2
+    
+    if(tip == 3) then
+      puti = p(1, mi)
+      if(bannedOrb(puti, mi)) return
+      do i = 1, 3
+        putj = p(i, ma)
+        if(banned(putj,puti,bant)) cycle
+        i1 = turn3(1,i)
+        i2 = turn3(2,i)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        h1 = h(1, ma)
+        h2 = h(2, ma)
+        
+        hij = (mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+        if(ma == 1) then
+          do k=1,N_states
+            mat(k, putj, puti) = mat(k, putj, puti) +coefs(k) * hij
+          enddo
+        else
+          do k=1,N_states
+            mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
+          enddo
+        end if
+      end do
+    else
+      h1 = h(1,1)
+      h2 = h(1,2)
+      do j = 1,2
+        putj = p(j, 2)
+        if(bannedOrb(putj, 2)) cycle
+        p2 = p(turn2(j), 2)
+        do i = 1,2
+          puti = p(i, 1)
+          
+          if(banned(puti,putj,bant) .or. bannedOrb(puti,1)) cycle
+          p1 = p(turn2(i), 1)
+          
+          hij = mo_bielec_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          do k=1,N_states
+            mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
+          enddo
+        end do
+      end do
+    end if
+
+  else
+    if(tip == 0) then
+      h1 = h(1, ma)
+      h2 = h(2, ma)
+      do i=1,3
+      puti = p(i, ma)
+      if(bannedOrb(puti,ma)) cycle
+      do j=i+1,4
+        putj = p(j, ma)
+        if(bannedOrb(putj,ma)) cycle
+        if(banned(puti,putj,1)) cycle
+        
+        i1 = turn2d(1, i, j)
+        i2 = turn2d(2, i, j)
+        p1 = p(i1, ma)
+        p2 = p(i2, ma)
+        hij = (mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, ma, ma, h1, p1, h2, p2, N_int)
+        do k=1,N_states
+          mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
+        enddo
+      end do
+      end do
+    else if(tip == 3) then
+      h1 = h(1, mi)
+      h2 = h(1, ma)
+      p1 = p(1, mi)
+      do i=1,3
+        puti = p(turn3(1,i), ma)
+        if(bannedOrb(puti,ma)) cycle
+        putj = p(turn3(2,i), ma)
+        if(bannedOrb(putj,ma)) cycle
+        if(banned(puti,putj,1)) cycle
+        p2 = p(i, ma)
+        
+        hij = mo_bielec_integral(p1, p2, h1, h2) * get_phase_bi(phasemask, mi, ma, h1, p1, h2, p2, N_int)
+        do k=1,N_states
+          mat(k, min(puti, putj), max(puti, putj)) = mat(k, min(puti, putj), max(puti, putj)) + coefs(k) * hij
+        enddo
+      end do
+    else ! tip == 4
+      puti = p(1, sp)
+      putj = p(2, sp)
+      if(.not. banned(puti,putj,1)) then
+        p1 = p(1, mi)
+        p2 = p(2, mi)
+        h1 = h(1, mi)
+        h2 = h(2, mi)
+        hij = (mo_bielec_integral(p1, p2, h1, h2) - mo_bielec_integral(p2,p1, h1, h2)) * get_phase_bi(phasemask, mi, mi, h1, p1, h2, p2, N_int)
+        do k=1,N_states
+          mat(k, puti, putj) = mat(k, puti, putj) +coefs(k) * hij
+        enddo
+      end if
+    end if
+  end if
+end 
+
+
+subroutine get_d1(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in)  :: mask(N_int, 2), gen(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in)            :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2)
+  integer(bit_kind)              :: det(N_int, 2)
+  double precision, intent(in)   :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num)
+  integer, intent(in)            :: h(0:2,2), p(0:4,2), sp
+  double precision               :: hij, tmp_row(N_states, mo_tot_num), tmp_row2(N_states, mo_tot_num)
+  double precision, external     :: get_phase_bi, mo_bielec_integral
+  logical                        :: ok
+
+  logical, allocatable           :: lbanned(:,:)
+  integer                        :: puti, putj, ma, mi, s1, s2, i, i1, i2, j
+  integer                        :: hfix, pfix, h1, h2, p1, p2, ib, k
+  
+  integer, parameter             :: turn2(2) = (/2,1/)
+  integer, parameter             :: turn3(2,3) = reshape((/2,3,  1,3, 1,2/), (/2,3/))
+  
+  integer                        :: bant
+  double precision, allocatable :: hij_cache(:,:)
+  PROVIDE mo_integrals_map N_int
+  
+  allocate (lbanned(mo_tot_num, 2))
+  allocate (hij_cache(mo_tot_num,2))
+  lbanned = bannedOrb
+    
+  do i=1, p(0,1)
+    lbanned(p(i,1), 1) = .true.
+  end do
+  do i=1, p(0,2)
+    lbanned(p(i,2), 2) = .true.
+  end do
+  
+  ma = 1
+  if(p(0,2) >= 2) ma = 2
+  mi = turn2(ma)
+  
+  bant = 1
+
+  if(sp == 3) then
+    !move MA
+    if(ma == 2) bant = 2
+    puti = p(1,mi)
+    hfix = h(1,ma)
+    p1 = p(1,ma)
+    p2 = p(2,ma)
+    if(.not. bannedOrb(puti, mi)) then
+      call get_mo_bielec_integrals(hfix,p1,p2,mo_tot_num,hij_cache(1,1),mo_integrals_map)
+      call get_mo_bielec_integrals(hfix,p2,p1,mo_tot_num,hij_cache(1,2),mo_integrals_map)
+      tmp_row = 0d0
+      do putj=1, hfix-1
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,1) - hij_cache(putj,2)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+          do k=1,N_states
+            tmp_row(k,putj) = tmp_row(k,putj) + hij * coefs(k)
+          enddo
+        endif
+      end do
+      do putj=hfix+1, mo_tot_num
+        if(lbanned(putj, ma)) cycle
+        if(banned(putj, puti,bant)) cycle
+        hij = hij_cache(putj,2) - hij_cache(putj,1)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+          tmp_row(1:N_states,putj) = tmp_row(1:N_states,putj) + hij * coefs(1:N_states)
+        endif
+      end do
+
+      if(ma == 1) then           
+        mat(1:N_states,1:mo_tot_num,puti) = mat(1:N_states,1:mo_tot_num,puti) + tmp_row(1:N_states,1:mo_tot_num)
+      else
+        mat(1:N_states,puti,1:mo_tot_num) = mat(1:N_states,puti,1:mo_tot_num) + tmp_row(1:N_states,1:mo_tot_num)
+      end if
+    end if
+
+    !MOVE MI
+    pfix = p(1,mi)
+    tmp_row = 0d0
+    tmp_row2 = 0d0
+    call get_mo_bielec_integrals(hfix,pfix,p1,mo_tot_num,hij_cache(1,1),mo_integrals_map)
+    call get_mo_bielec_integrals(hfix,pfix,p2,mo_tot_num,hij_cache(1,2),mo_integrals_map)
+    do puti=1,mo_tot_num
+      if(lbanned(puti,mi)) cycle
+      !p1 fixed
+      putj = p1
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,2)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p2, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)  ! HOTSPOT
+          enddo
+        endif
+      end if
+      
+      putj = p2
+      if(.not. banned(putj,puti,bant)) then
+        hij = hij_cache(puti,1)
+        if (hij /= 0.d0) then
+          hij = hij * get_phase_bi(phasemask, ma, mi, hfix, p1, puti, pfix, N_int)
+          do k=1,N_states
+            tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k) 
+          enddo
+        endif
+      end if
+    end do
+    
+    if(mi == 1) then
+      mat(:,:,p1) = mat(:,:,p1) + tmp_row(:,:)
+      mat(:,:,p2) = mat(:,:,p2) + tmp_row2(:,:)
+    else
+      mat(:,p1,:) = mat(:,p1,:) + tmp_row(:,:)
+      mat(:,p2,:) = mat(:,p2,:) + tmp_row2(:,:)
+    end if
+
+  else  ! sp /= 3
+
+    if(p(0,ma) == 3) then
+      do i=1,3
+        hfix = h(1,ma)
+        puti = p(i, ma)
+        p1 = p(turn3(1,i), ma)
+        p2 = p(turn3(2,i), ma)
+        call get_mo_bielec_integrals(hfix,p1,p2,mo_tot_num,hij_cache(1,1),mo_integrals_map)
+        call get_mo_bielec_integrals(hfix,p2,p1,mo_tot_num,hij_cache(1,2),mo_integrals_map)
+        tmp_row = 0d0
+        do putj=1,hfix-1
+          if(lbanned(putj,ma)) cycle
+          if(banned(putj,puti,1)) cycle
+          hij = hij_cache(putj,1) - hij_cache(putj,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, putj, p1, hfix, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+        do putj=hfix+1,mo_tot_num
+          if(lbanned(putj,ma)) cycle
+          if(banned(putj,puti,1)) cycle
+          hij = hij_cache(putj,2) - hij_cache(putj,1)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, ma, ma, hfix, p1, putj, p2, N_int)
+            tmp_row(:,putj) = tmp_row(:,putj) + hij * coefs(:)
+          endif
+        end do
+
+        mat(:, :puti-1, puti) = mat(:, :puti-1, puti) + tmp_row(:,:puti-1)
+        mat(:, puti, puti:) = mat(:, puti,puti:) + tmp_row(:,puti:)
+      end do
+    else
+      hfix = h(1,mi)
+      pfix = p(1,mi)
+      p1 = p(1,ma)
+      p2 = p(2,ma)
+      tmp_row = 0d0
+      tmp_row2 = 0d0
+      call get_mo_bielec_integrals(hfix,p1,pfix,mo_tot_num,hij_cache(1,1),mo_integrals_map)
+      call get_mo_bielec_integrals(hfix,p2,pfix,mo_tot_num,hij_cache(1,2),mo_integrals_map)
+      do puti=1,mo_tot_num
+        if(lbanned(puti,ma)) cycle
+        putj = p2
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,1)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p1, N_int)
+            do k=1,N_states
+              tmp_row(k,puti) = tmp_row(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+        
+        putj = p1
+        if(.not. banned(puti,putj,1)) then
+          hij = hij_cache(puti,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, mi, ma, hfix, pfix, puti, p2, N_int)
+            do k=1,N_states
+              tmp_row2(k,puti) = tmp_row2(k,puti) + hij * coefs(k)
+            enddo
+          endif
+        end if
+      end do
+      mat(:,:p2-1,p2) = mat(:,:p2-1,p2) + tmp_row(:,:p2-1)
+      mat(:,p2,p2:) = mat(:,p2,p2:) + tmp_row(:,p2:)
+      mat(:,:p1-1,p1) = mat(:,:p1-1,p1) + tmp_row2(:,:p1-1)
+      mat(:,p1,p1:) = mat(:,p1,p1:) + tmp_row2(:,p1:)
+    end if
+  end if
+  deallocate(lbanned,hij_cache)
+
+ !! MONO
+    if(sp == 3) then
+      s1 = 1
+      s2 = 2
+    else
+      s1 = sp
+      s2 = sp
+    end if
+
+    do i1=1,p(0,s1)
+      ib = 1
+      if(s1 == s2) ib = i1+1
+      p1 = p(i1,s1)
+      if(bannedOrb(p1, s1)) cycle
+      do i2=ib,p(0,s2)
+        p2 = p(i2,s2)
+        if(bannedOrb(p2, s2) .or. banned(p1, p2, 1)) cycle
+        call apply_particles(mask, s1, p1, s2, p2, det, ok, N_int)
+        call i_h_j(gen, det, N_int, hij)
+        mat(:, p1, p2) = mat(:, p1, p2) + coefs(:) * hij
+      end do
+    end do
+end 
+
+
+
+
+subroutine get_d0(gen, phasemask, bannedOrb, banned, mat, mask, h, p, sp, coefs)
+  use bitmasks
+  implicit none
+
+  integer(bit_kind), intent(in) :: gen(N_int, 2), mask(N_int, 2)
+  integer(bit_kind), intent(in) :: phasemask(N_int,2)
+  logical, intent(in) :: bannedOrb(mo_tot_num, 2), banned(mo_tot_num, mo_tot_num,2)
+  integer(bit_kind) :: det(N_int, 2)
+  double precision, intent(in) :: coefs(N_states)
+  double precision, intent(inout) :: mat(N_states, mo_tot_num, mo_tot_num)
+  integer, intent(in) :: h(0:2,2), p(0:4,2), sp
+  
+  integer :: i, j, k, s, h1, h2, p1, p2, puti, putj
+  double precision :: hij, phase
+  double precision, external :: get_phase_bi, mo_bielec_integral
+  logical :: ok
+  
+  integer :: bant
+  double precision, allocatable :: hij_cache(:,:)
+  allocate (hij_cache(mo_tot_num,2))
+  bant = 1
+  
+
+  if(sp == 3) then ! AB
+    h1 = p(1,1)
+    h2 = p(1,2)
+
+    do p2=1, mo_tot_num
+      if(bannedOrb(p2,2)) cycle
+      call get_mo_bielec_integrals(p2,h1,h2,mo_tot_num,hij_cache(1,1),mo_integrals_map)
+      do p1=1, mo_tot_num
+        if(bannedOrb(p1, 1)) cycle
+        if(banned(p1, p2, bant)) cycle ! rentable?
+        if(p1 == h1 .or. p2 == h2) then
+          call apply_particles(mask, 1,p1,2,p2, det, ok, N_int)
+          call i_h_j(gen, det, N_int, hij)
+        else
+          phase = get_phase_bi(phasemask, 1, 2, h1, p1, h2, p2, N_int)
+          hij = hij_cache(p1,1) * phase
+        end if
+        do k=1,N_states
+          mat(k, p1, p2) = mat(k, p1, p2) + coefs(k) * hij  ! HOTSPOT
+        enddo
+      end do
+    end do
+
+  else ! AA BB
+    p1 = p(1,sp)
+    p2 = p(2,sp)
+    do puti=1, mo_tot_num
+      if(bannedOrb(puti, sp)) cycle
+      call get_mo_bielec_integrals(puti,p2,p1,mo_tot_num,hij_cache(1,1),mo_integrals_map)
+      call get_mo_bielec_integrals(puti,p1,p2,mo_tot_num,hij_cache(1,2),mo_integrals_map)
+      do putj=puti+1, mo_tot_num
+        if(bannedOrb(putj, sp)) cycle
+        if(banned(puti, putj, bant)) cycle ! rentable?
+        if(puti == p1 .or. putj == p2 .or. puti == p2 .or. putj == p1) then
+          call apply_particles(mask, sp,puti,sp,putj, det, ok, N_int)
+          call i_h_j(gen, det, N_int, hij)
+          if (hij /= 0.d0) then
+            do k=1,N_states
+              mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+            enddo
+          endif
+        else
+          hij = hij_cache(putj,1) -  hij_cache(putj,2)
+          if (hij /= 0.d0) then
+            hij = hij * get_phase_bi(phasemask, sp, sp, puti, p1 , putj, p2, N_int)
+            do k=1,N_states
+              mat(k, puti, putj) = mat(k, puti, putj) + coefs(k) * hij
+            enddo
+          endif
+        end if
+      end do
+    end do
+  end if
+  deallocate(hij_cache)
+end 
+ 
+
+subroutine past_d1(bannedOrb, p)
+  use bitmasks
+  implicit none
+
+  logical, intent(inout) :: bannedOrb(mo_tot_num, 2)
+  integer, intent(in) :: p(0:4, 2)
+  integer :: i,s
+
+  do s = 1, 2
+    do i = 1, p(0, s)
+      bannedOrb(p(i, s), s) = .true.
+    end do
+  end do
+end 
+
+
+subroutine past_d2(banned, p, sp)
+  use bitmasks
+  implicit none
+
+  logical, intent(inout) :: banned(mo_tot_num, mo_tot_num)
+  integer, intent(in) :: p(0:4, 2), sp
+  integer :: i,j
+
+  if(sp == 3) then
+    do i=1,p(0,1)
+      do j=1,p(0,2)
+        banned(p(i,1), p(j,2)) = .true.
+      end do
+    end do
+  else
+    do i=1,p(0, sp)
+      do j=1,i-1
+        banned(p(j,sp), p(i,sp)) = .true.
+        banned(p(i,sp), p(j,sp)) = .true.
+      end do
+    end do
+  end if
+end 
+
+
+
+subroutine spot_isinwf(mask, det, i_gen, N, banned, fullMatch, interesting)
+  use bitmasks
+  implicit none
+  
+  integer, intent(in) :: i_gen, N
+  integer, intent(in) :: interesting(0:N)
+  integer(bit_kind),intent(in) :: mask(N_int, 2), det(N_int, 2, N)
+  logical, intent(inout) :: banned(mo_tot_num, mo_tot_num)
+  logical, intent(out) :: fullMatch
+
+
+  integer :: i, j, na, nb, list(3)
+  integer(bit_kind) :: myMask(N_int, 2), negMask(N_int, 2)
+
+  fullMatch = .false.
+
+  do i=1,N_int
+    negMask(i,1) = not(mask(i,1))
+    negMask(i,2) = not(mask(i,2))
+  end do
+
+  genl : do i=1, N
+    do j=1, N_int
+      if(iand(det(j,1,i), mask(j,1)) /= mask(j, 1)) cycle genl
+      if(iand(det(j,2,i), mask(j,2)) /= mask(j, 2)) cycle genl
+    end do
+
+    if(interesting(i) < i_gen) then
+      fullMatch = .true.
+      return
+    end if
+
+    do j=1, N_int
+      myMask(j, 1) = iand(det(j, 1, i), negMask(j, 1))
+      myMask(j, 2) = iand(det(j, 2, i), negMask(j, 2))
+    end do
+
+    call bitstring_to_list_in_selection(myMask(1,1), list(1), na, N_int)
+    call bitstring_to_list_in_selection(myMask(1,2), list(na+1), nb, N_int)
+    banned(list(1), list(2)) = .true.
+  end do genl
+end 
+
+
+subroutine bitstring_to_list_in_selection( string, list, n_elements, Nint)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Gives the inidices(+1) of the bits set to 1 in the bit string
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: string(Nint)
+  integer, intent(out)           :: list(Nint*bit_kind_size)
+  integer, intent(out)           :: n_elements
+  
+  integer                        :: i, ishift
+  integer(bit_kind)              :: l
+  
+  n_elements = 0
+  ishift = 2
+  do i=1,Nint
+    l = string(i)
+    do while (l /= 0_bit_kind)
+      n_elements = n_elements+1
+      list(n_elements) = ishift+popcnt(l-1_bit_kind) - popcnt(l)
+      l = iand(l,l-1_bit_kind)
+    enddo
+    ishift = ishift + bit_kind_size
+  enddo
+  
+end
diff --git a/src/fci/selection_buffer.irp.f b/src/fci/selection_buffer.irp.f
new file mode 100644
index 00000000..3053213f
--- /dev/null
+++ b/src/fci/selection_buffer.irp.f
@@ -0,0 +1,303 @@
+
+subroutine create_selection_buffer(N, siz_, res)
+  use selection_types
+  implicit none
+
+  integer, intent(in) :: N, siz_
+  type(selection_buffer), intent(out) :: res
+
+  integer :: siz
+  siz = max(siz_,1)
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_double(siz)*(N_int*2+1)
+  call check_mem(rss,irp_here)
+
+  allocate(res%det(N_int, 2, siz), res%val(siz))
+
+  res%val(:) = 0d0
+  res%det(:,:,:) = 0_8
+  res%N = N
+  res%mini = 0d0
+  res%cur = 0
+end subroutine
+
+subroutine delete_selection_buffer(b)
+  use selection_types
+  implicit none
+  type(selection_buffer), intent(inout) :: b
+  if (associated(b%det)) then
+    deallocate(b%det)
+  endif
+  if (associated(b%val)) then
+    deallocate(b%val)
+  endif
+end
+
+  
+subroutine add_to_selection_buffer(b, det, val)
+  use selection_types
+  implicit none
+
+  type(selection_buffer), intent(inout) :: b
+  integer(bit_kind), intent(in) :: det(N_int, 2)
+  double precision, intent(in) :: val
+  integer :: i
+
+  if(b%N > 0 .and. val <= b%mini) then
+    b%cur += 1
+    b%det(1:N_int,1:2,b%cur) = det(1:N_int,1:2)
+    b%val(b%cur) = val
+    if(b%cur == size(b%val)) then
+      call sort_selection_buffer(b)
+    end if
+  end if
+end subroutine
+
+subroutine merge_selection_buffers(b1, b2)
+  use selection_types
+  implicit none
+  BEGIN_DOC
+! Merges the selection buffers b1 and b2 into b2
+  END_DOC
+  type(selection_buffer), intent(inout) :: b1
+  type(selection_buffer), intent(inout) :: b2
+  integer(bit_kind), pointer     :: detmp(:,:,:)
+  double precision, pointer      :: val(:)
+  integer                        :: i, i1, i2, k, nmwen
+  if (b1%cur == 0) return
+  do while (b1%val(b1%cur) > b2%mini)
+    b1%cur = b1%cur-1
+    if (b1%cur == 0) then
+      return
+    endif
+  enddo
+  nmwen = min(b1%N, b1%cur+b2%cur)
+  double precision :: rss
+  double precision, external :: memory_of_double
+  rss = memory_of_double(size(b1%val)) + 2*N_int*memory_of_double(size(b1%det,3))
+  call check_mem(rss,irp_here)
+  allocate( val(size(b1%val)), detmp(N_int, 2, size(b1%det,3)) )
+  i1=1
+  i2=1
+  do i=1,nmwen
+    if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then
+      exit 
+    else if (i1 > b1%cur) then
+        val(i) = b2%val(i2)
+        detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2)
+        detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2)
+        i2=i2+1
+    else if (i2 > b2%cur) then
+        val(i) = b1%val(i1)
+        detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1)
+        detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1)
+        i1=i1+1
+    else
+      if (b1%val(i1) <= b2%val(i2)) then
+        val(i) = b1%val(i1)
+        detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1)
+        detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1)
+        i1=i1+1
+      else
+        val(i) = b2%val(i2)
+        detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2)
+        detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2)
+        i2=i2+1
+      endif
+    endif
+  enddo
+  deallocate(b2%det, b2%val)
+  do i=nmwen+1,b2%N
+    val(i) = 0.d0
+    detmp(1:N_int,1:2,i) = 0_bit_kind
+  enddo
+  b2%det => detmp
+  b2%val => val
+  b2%mini = min(b2%mini,b2%val(b2%N))
+  b2%cur = nmwen
+end
+
+
+subroutine sort_selection_buffer(b)
+  use selection_types
+  implicit none
+
+  type(selection_buffer), intent(inout) :: b
+  integer, allocatable :: iorder(:)
+  integer(bit_kind), pointer :: detmp(:,:,:)
+  integer :: i, nmwen
+  logical, external :: detEq
+  if (b%N == 0 .or. b%cur == 0) return
+  nmwen = min(b%N, b%cur)
+
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = memory_of_int(b%cur) + 2*N_int*memory_of_double(size(b%det,3))
+  call check_mem(rss,irp_here)
+  allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3)))
+  do i=1,b%cur
+    iorder(i) = i
+  end do
+  call dsort(b%val, iorder, b%cur)
+  do i=1, nmwen
+    detmp(1:N_int,1,i) = b%det(1:N_int,1,iorder(i))
+    detmp(1:N_int,2,i) = b%det(1:N_int,2,iorder(i))
+  end do
+  deallocate(b%det,iorder)
+  b%det => detmp
+  b%mini = min(b%mini,b%val(b%N))
+  b%cur = nmwen
+end subroutine
+
+subroutine make_selection_buffer_s2(b)
+  use selection_types
+  type(selection_buffer), intent(inout) :: b
+
+  integer(bit_kind), allocatable :: o(:,:,:)
+  double precision, allocatable  :: val(:)
+
+  integer :: n_d
+  integer :: i,k,sze,n_alpha,j,n
+  logical                        :: dup
+ 
+  ! Sort
+  integer, allocatable           :: iorder(:)
+  integer*8, allocatable         :: bit_tmp(:)
+  integer*8, external            :: occ_pattern_search_key
+  integer(bit_kind), allocatable :: tmp_array(:,:,:)
+  logical, allocatable           :: duplicate(:)
+
+  n_d = b%cur
+  double precision :: rss
+  double precision, external :: memory_of_double, memory_of_int
+  rss = (4*N_int+4)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+  allocate(o(N_int,2,n_d), iorder(n_d), duplicate(n_d), bit_tmp(n_d), &
+           tmp_array(N_int,2,n_d), val(n_d) )
+
+  do i=1,n_d
+    do k=1,N_int
+      o(k,1,i) = ieor(b%det(k,1,i), b%det(k,2,i))
+      o(k,2,i) = iand(b%det(k,1,i), b%det(k,2,i))
+    enddo
+    iorder(i) = i
+    bit_tmp(i) = occ_pattern_search_key(o(1,1,i),N_int)
+  enddo
+
+  deallocate(b%det)
+
+  call i8sort(bit_tmp,iorder,n_d)
+
+  do i=1,n_d
+    do k=1,N_int
+      tmp_array(k,1,i) = o(k,1,iorder(i))
+      tmp_array(k,2,i) = o(k,2,iorder(i))
+    enddo
+    val(i) = b%val(iorder(i))
+    duplicate(i) = .False.
+  enddo
+  
+  ! Find duplicates
+  do i=1,n_d-1
+    if (duplicate(i)) then
+      cycle
+    endif
+    j = i+1
+    do while (bit_tmp(j)==bit_tmp(i))
+      if (duplicate(j)) then
+        j+=1
+        if (j>n_d) then
+          exit
+        endif
+        cycle
+      endif
+      dup = .True.
+      do k=1,N_int
+        if ( (tmp_array(k,1,i) /= tmp_array(k,1,j))                   &
+              .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then
+          dup = .False.
+          exit
+        endif
+      enddo
+      if (dup) then
+!        val(i) = val(i) + val(j)
+        val(i) = max(val(i), val(j))
+        duplicate(j) = .True.
+      endif
+      j+=1
+      if (j>n_d) then
+        exit
+      endif
+    enddo
+  enddo
+  
+  deallocate (b%val)
+  ! Copy filtered result
+  integer :: n_p
+  n_p=0
+  do i=1,n_d
+    if (duplicate(i)) then
+      cycle
+    endif
+    n_p = n_p + 1
+    do k=1,N_int
+      o(k,1,n_p) = tmp_array(k,1,i)
+      o(k,2,n_p) = tmp_array(k,2,i)
+    enddo
+    val(n_p) = val(i)
+  enddo
+  
+  ! Sort by importance
+  do i=1,n_p
+    iorder(i) = i
+  end do
+  call dsort(val,iorder,n_p)
+  do i=1,n_p
+    do k=1,N_int
+      tmp_array(k,1,i) = o(k,1,iorder(i))
+      tmp_array(k,2,i) = o(k,2,iorder(i))
+    enddo
+  enddo
+  do i=1,n_p
+    do k=1,N_int
+      o(k,1,i) = tmp_array(k,1,i)
+      o(k,2,i) = tmp_array(k,2,i)
+    enddo
+  enddo
+  
+  ! Create determinants
+  n_d = 0
+  do i=1,n_p
+    call occ_pattern_to_dets_size(o(1,1,i),sze,elec_alpha_num,N_int)
+    n_d = n_d + sze
+    if (n_d > b%cur) then
+!      if (n_d - b%cur > b%cur - n_d + sze) then
+!        n_d = n_d - sze
+!      endif
+      exit
+    endif
+  enddo
+
+  rss = (4*N_int+2)*memory_of_double(n_d)
+  call check_mem(rss,irp_here)
+  allocate(b%det(N_int,2,2*n_d), b%val(2*n_d))
+  k=1
+  do i=1,n_p
+    n=n_d
+    call occ_pattern_to_dets_size(o(1,1,i),n,elec_alpha_num,N_int)
+    call occ_pattern_to_dets(o(1,1,i),b%det(1,1,k),n,elec_alpha_num,N_int)
+    do j=k,k+n-1
+      b%val(j) = val(i)
+    enddo
+    k = k+n
+    if (k > n_d) exit
+  enddo
+  deallocate(o)
+  b%N = 2*n_d
+  b%cur = n_d
+end
+
+
+
diff --git a/src/fci/selection_types.f90 b/src/fci/selection_types.f90
new file mode 100644
index 00000000..29e48524
--- /dev/null
+++ b/src/fci/selection_types.f90
@@ -0,0 +1,9 @@
+module selection_types
+  type selection_buffer
+    integer :: N, cur
+    integer(8)      , pointer :: det(:,:,:)
+    double precision, pointer :: val(:)
+    double precision          :: mini
+  endtype
+end module
+
diff --git a/src/fci/zmq_selection.irp.f b/src/fci/zmq_selection.irp.f
new file mode 100644
index 00000000..ca1e87e9
--- /dev/null
+++ b/src/fci/zmq_selection.irp.f
@@ -0,0 +1,219 @@
+subroutine ZMQ_selection(N_in, pt2, variance, norm)
+  use f77_zmq
+  use selection_types
+  
+  implicit none
+  
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket , zmq_socket_pull
+  integer, intent(in)            :: N_in
+  type(selection_buffer)         :: b
+  integer                        :: i, N
+  integer, external              :: omp_get_thread_num
+  double precision, intent(out)  :: pt2(N_states)
+  double precision, intent(out)  :: variance(N_states)
+  double precision, intent(out)  :: norm(N_states)
+  
+!  PROVIDE psi_det psi_coef N_det qp_max_mem N_states pt2_F s2_eig N_det_generators
+  
+  N = max(N_in,1)
+  if (.True.) then
+    PROVIDE pt2_e0_denominator nproc
+    PROVIDE psi_bilinear_matrix_columns_loc psi_det_alpha_unique psi_det_beta_unique
+    PROVIDE psi_bilinear_matrix_rows psi_det_sorted_order psi_bilinear_matrix_order
+    PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
+    PROVIDE psi_bilinear_matrix_transp_order 
+
+    call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'selection')
+
+    integer, external              :: zmq_put_psi
+    integer, external              :: zmq_put_N_det_generators
+    integer, external              :: zmq_put_N_det_selectors
+    integer, external              :: zmq_put_dvector
+
+    if (zmq_put_psi(zmq_to_qp_run_socket,1) == -1) then
+      stop 'Unable to put psi on ZMQ server'
+    endif
+    if (zmq_put_N_det_generators(zmq_to_qp_run_socket, 1) == -1) then
+      stop 'Unable to put N_det_generators on ZMQ server'
+    endif
+    if (zmq_put_N_det_selectors(zmq_to_qp_run_socket, 1) == -1) then
+      stop 'Unable to put N_det_selectors on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'energy',pt2_e0_denominator,size(pt2_e0_denominator)) == -1) then
+      stop 'Unable to put energy on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_selectors',threshold_selectors,1) == -1) then
+      stop 'Unable to put threshold_selectors on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'state_average_weight',state_average_weight,N_states) == -1) then
+      stop 'Unable to put state_average_weight on ZMQ server'
+    endif
+    if (zmq_put_dvector(zmq_to_qp_run_socket,1,'threshold_generators',threshold_generators,1) == -1) then
+      stop 'Unable to put threshold_generators on ZMQ server'
+    endif
+    call create_selection_buffer(N, N*2, b)
+  endif
+
+  integer, external :: add_task_to_taskserver
+  character(len=100000)           :: task
+  integer :: j,k,ipos
+  ipos=1
+  task = ' ' 
+
+ do i= 1, N_det_generators
+    do j=1,pt2_F(i)
+      write(task(ipos:ipos+30),'(I9,1X,I9,1X,I9,''|'')') j, i, N
+      ipos += 30
+      if (ipos > 100000-30) then
+        if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+          stop 'Unable to add task to task server'
+        endif
+        ipos=1
+      endif
+    end do
+  enddo
+  if (ipos > 1) then
+    if (add_task_to_taskserver(zmq_to_qp_run_socket,trim(task(1:ipos))) == -1) then
+      stop 'Unable to add task to task server'
+    endif
+  endif
+  
+
+  ASSERT (associated(b%det))
+  ASSERT (associated(b%val))
+
+  integer, external :: zmq_set_running
+  if (zmq_set_running(zmq_to_qp_run_socket) == -1) then
+    print *,  irp_here, ': Failed in zmq_set_running'
+  endif
+
+  integer :: nproc_target
+  nproc_target = nproc
+  double precision :: mem
+  mem = 8.d0 * N_det * (N_int * 2.d0 * 3.d0 +  3.d0 + 5.d0) / (1024.d0**3)
+  call write_double(6,mem,'Estimated memory/thread (Gb)')
+  if (qp_max_mem > 0) then
+    nproc_target = max(1,int(dble(qp_max_mem)/mem))
+    nproc_target = min(nproc_target,nproc)
+  endif
+
+  f(:) = 1.d0
+  if (.not.do_pt2) then
+  double precision :: f(N_states), u_dot_u
+    do k=1,min(N_det,N_states)
+     f(k) = 1.d0 / u_dot_u(psi_selectors_coef(1,k), N_det_selectors)
+    enddo
+  endif
+
+  !$OMP PARALLEL DEFAULT(shared)  SHARED(b, pt2, variance, norm)  PRIVATE(i) NUM_THREADS(nproc_target+1)
+  i = omp_get_thread_num()
+  if (i==0) then
+    call selection_collector(zmq_socket_pull, b, N, pt2, variance, norm)
+  else
+    call selection_slave_inproc(i)
+  endif
+  !$OMP END PARALLEL
+  call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'selection')
+  do i=N_det+1,N_states
+    pt2(i) = 0.d0
+    variance(i) = 0.d0
+    norm(i) = 0.d0
+  enddo
+  if (N_in > 0) then
+    if (s2_eig) then
+      call make_selection_buffer_s2(b)
+    endif
+    call fill_H_apply_buffer_no_selection(b%cur,b%det,N_int,0) 
+    call copy_H_apply_buffer_to_wf()
+    call save_wavefunction
+  endif
+  call delete_selection_buffer(b)
+  do k=1,N_states
+    pt2(k) = pt2(k) * f(k)
+    variance(k) = variance(k) * f(k)
+    norm(k) = norm(k) * f(k)
+  enddo
+
+end subroutine
+
+
+subroutine selection_slave_inproc(i)
+  implicit none
+  integer, intent(in)            :: i
+
+  call run_selection_slave(1,i,pt2_e0_denominator)
+end
+
+subroutine selection_collector(zmq_socket_pull, b, N, pt2, variance, norm)
+  use f77_zmq
+  use selection_types
+  use bitmasks
+  implicit none
+
+
+  integer(ZMQ_PTR), intent(in)   :: zmq_socket_pull
+  type(selection_buffer), intent(inout) :: b
+  integer, intent(in) :: N
+  double precision, intent(inout)    :: pt2(N_states)
+  double precision, intent(inout)    :: variance(N_states)
+  double precision, intent(inout)    :: norm(N_states)
+  double precision                   :: pt2_mwen(N_states)
+  double precision                   :: variance_mwen(N_states)
+  double precision                   :: norm_mwen(N_states)
+  integer(ZMQ_PTR),external      :: new_zmq_to_qp_run_socket
+  integer(ZMQ_PTR)               :: zmq_to_qp_run_socket
+
+  integer(ZMQ_PTR), external     :: new_zmq_pull_socket
+
+  integer :: msg_size, rc, more
+  integer :: acc, i, j, robin, ntask
+  double precision, pointer :: val(:)
+  integer(bit_kind), pointer :: det(:,:,:)
+  integer, allocatable :: task_id(:)
+  type(selection_buffer) :: b2
+  
+  
+  
+
+  zmq_to_qp_run_socket = new_zmq_to_qp_run_socket()
+  call create_selection_buffer(N, N*2, b2)
+  double precision :: rss
+  double precision, external :: memory_of_int
+  rss = memory_of_int(N_det_generators)
+  call check_mem(rss,irp_here)
+  allocate(task_id(N_det_generators))
+  more = 1
+  pt2(:)           = 0d0
+  variance(:)      = 0.d0
+  norm(:)          = 0.d0
+  pt2_mwen(:)      = 0.d0
+  variance_mwen(:) = 0.d0
+  norm_mwen(:)     = 0.d0
+  do while (more == 1)
+    call pull_selection_results(zmq_socket_pull, pt2_mwen, variance_mwen, norm_mwen, b2%val(1), b2%det(1,1,1), b2%cur, task_id, ntask)
+
+    pt2(:) += pt2_mwen(:)
+    variance(:) += variance_mwen(:)
+    norm(:) += norm_mwen(:)
+    do i=1, b2%cur
+      call add_to_selection_buffer(b, b2%det(1,1,i), b2%val(i))
+      if (b2%val(i) > b%mini) exit
+    end do
+
+    do i=1, ntask
+      if(task_id(i) == 0) then
+          print *,  "Error in collector"
+      endif
+      integer, external :: zmq_delete_task
+      if (zmq_delete_task(zmq_to_qp_run_socket,zmq_socket_pull,task_id(i),more) == -1) then
+        stop 'Unable to delete task'
+      endif
+    end do
+  end do
+
+
+  call delete_selection_buffer(b2)
+  call sort_selection_buffer(b)
+  call end_zmq_to_qp_run_socket(zmq_to_qp_run_socket)
+end subroutine
+
diff --git a/src/four_idx/NEED b/src/four_idx/NEED
new file mode 100644
index 00000000..7ba1092b
--- /dev/null
+++ b/src/four_idx/NEED
@@ -0,0 +1 @@
+integrals_bielec
diff --git a/src/four_idx/README.rst b/src/four_idx/README.rst
new file mode 100644
index 00000000..6ea432c6
--- /dev/null
+++ b/src/four_idx/README.rst
@@ -0,0 +1,6 @@
+=======
+FourIdx 
+=======
+
+Four-index transformation.
+
diff --git a/src/four_idx/four_idx_transform.irp.f b/src/four_idx/four_idx_transform.irp.f
new file mode 100644
index 00000000..a98ce2aa
--- /dev/null
+++ b/src/four_idx/four_idx_transform.irp.f
@@ -0,0 +1,12 @@
+program four_idx
+  implicit none
+  BEGIN_DOC
+! 4-index transformation from AO to MO integrals
+  END_DOC
+  
+  disk_access_mo_integrals = 'Write'
+  SOFT_TOUCH disk_access_mo_integrals
+  if (.true.) then
+    PROVIDE mo_bielec_integrals_in_map
+  endif
+end
diff --git a/src/hartree_fock/EZFIO.cfg b/src/hartree_fock/EZFIO.cfg
new file mode 100644
index 00000000..22d2447e
--- /dev/null
+++ b/src/hartree_fock/EZFIO.cfg
@@ -0,0 +1,53 @@
+[max_dim_diis]
+type: integer
+doc: Maximum size of the |DIIS| extrapolation procedure
+interface: ezfio,provider,ocaml
+default: 15
+
+[threshold_diis]
+type: Threshold
+doc: Threshold on the convergence of the |DIIS| error vector during a Hartree-Fock calculation. If 0. is chosen, the square root of thresh_scf will be used.
+interface: ezfio,provider,ocaml
+default: 0.
+
+[thresh_scf]
+type: Threshold
+doc: Threshold on the convergence of the Hartree Fock energy. 
+interface: ezfio,provider,ocaml
+default: 1.e-10
+
+[n_it_scf_max]
+type: Strictly_positive_int
+doc: Maximum number of |SCF| iterations
+interface: ezfio,provider,ocaml
+default: 500
+
+[level_shift]
+type: Positive_float
+doc: Initial value of the energy shift on the virtual |MOs|
+interface: ezfio,provider,ocaml
+default: 0.0
+
+[scf_algorithm]
+type: character*(32)
+doc: Type of |SCF| algorithm used. Possible choices are [ Simple | DIIS]
+interface: ezfio,provider,ocaml
+default: DIIS
+
+[mo_guess_type]
+type: MO_guess
+doc: Initial MO guess. Can be [ Huckel | HCore ]
+interface: ezfio,provider,ocaml
+default: Huckel
+
+[energy]
+type: double precision
+doc: Calculated HF energy
+interface: ezfio
+
+[no_oa_or_av_opt]
+type: logical
+doc: If |true|, skip the (inactive+core) --> (active) and the (active) --> (virtual) orbital rotations within the |SCF| procedure
+interface: ezfio,provider,ocaml
+default: False
+
diff --git a/src/hartree_fock/NEED b/src/hartree_fock/NEED
new file mode 100644
index 00000000..24ae5fc0
--- /dev/null
+++ b/src/hartree_fock/NEED
@@ -0,0 +1,4 @@
+integrals_bielec
+ao_one_e_integrals
+mo_guess
+bitmask
diff --git a/src/hartree_fock/README.rst b/src/hartree_fock/README.rst
new file mode 100644
index 00000000..373a611d
--- /dev/null
+++ b/src/hartree_fock/README.rst
@@ -0,0 +1,33 @@
+============
+Hartree-Fock
+============
+
+
+The Hartree-Fock module performs *Restricted* Hartree-Fock calculations (the
+spatial part of the |MOs| is common for alpha and beta spinorbitals).
+
+The Hartree-Fock program does the following:
+
+#. Compute/Read all the one- and two-electron integrals, and store them in memory
+#. Check in the |EZFIO| database if there is a set of |MOs|. If there is, it
+   will read them as initial guess. Otherwise, it will create a guess.
+#. Perform the |SCF| iterations
+
+At each iteration, the |MOs| are saved in the |EZFIO| database. Hence, if the calculation
+crashes for any unexpected reason, the calculation can be restarted by running again
+the |SCF| with the same |EZFIO| database.
+
+The `DIIS`_ algorithm is implemented, as well as the `level-shifting`_ method.
+If the |SCF| does not converge, try again with a higher value of :option:`level_shift`.
+
+To start a calculation from scratch, the simplest way is to remove the
+``mo_basis`` directory from the |EZFIO| database, and run the |SCF| again.
+
+
+
+
+.. _DIIS: https://en.wikipedia.org/w/index.php?title=DIIS
+.. _level-shifting: https://doi.org/10.1002/qua.560070407
+
+
+
diff --git a/src/hartree_fock/diagonalize_fock.irp.f b/src/hartree_fock/diagonalize_fock.irp.f
new file mode 100644
index 00000000..2d198f62
--- /dev/null
+++ b/src/hartree_fock/diagonalize_fock.irp.f
@@ -0,0 +1,100 @@
+BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_mo, (ao_num,mo_tot_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Eigenvector of the Fock matrix in the MO basis obtained with level shift.
+   END_DOC
+   
+   integer                        :: i,j
+   integer                        :: liwork, lwork, n, info
+   integer, allocatable           :: iwork(:)
+   double precision, allocatable  :: work(:), F(:,:), S(:,:)
+   double precision, allocatable  :: diag(:) 
+   
+   
+   allocate( F(mo_tot_num,mo_tot_num) )
+   allocate (diag(mo_tot_num) )
+
+   do j=1,mo_tot_num
+     do i=1,mo_tot_num
+       F(i,j) = Fock_matrix_mo(i,j)
+     enddo
+   enddo
+   if(no_oa_or_av_opt)then
+     integer                        :: iorb,jorb
+     do i = 1, n_act_orb
+       iorb = list_act(i)
+       do j = 1, n_inact_orb
+         jorb = list_inact(j)
+         F(iorb,jorb) = 0.d0
+         F(jorb,iorb) = 0.d0
+       enddo
+       do j = 1, n_virt_orb
+         jorb = list_virt(j)
+         F(iorb,jorb) = 0.d0
+         F(jorb,iorb) = 0.d0
+       enddo
+       do j = 1, n_core_orb
+         jorb = list_core(j)
+         F(iorb,jorb) = 0.d0
+         F(jorb,iorb) = 0.d0
+       enddo
+     enddo
+   endif
+   
+   
+   ! Insert level shift here
+   do i = elec_beta_num+1, elec_alpha_num
+     F(i,i) += 0.5d0*level_shift
+   enddo
+   
+   do i = elec_alpha_num+1, mo_tot_num
+     F(i,i) += level_shift
+   enddo
+   
+   n = mo_tot_num
+   lwork = 1+6*n + 2*n*n
+   liwork = 3 + 5*n
+   
+   allocate(work(lwork))
+   allocate(iwork(liwork) )
+   
+   lwork = -1
+   liwork = -1
+   
+   call dsyevd( 'V', 'U', mo_tot_num, F,                             &
+       size(F,1), diag, work, lwork, iwork, liwork, info)
+   
+   if (info /= 0) then
+     print *,  irp_here//' DSYEVD failed : ', info
+     stop 1
+   endif
+   lwork = int(work(1))
+   liwork = iwork(1)
+   deallocate(iwork)
+   deallocate(work)
+   
+   allocate(work(lwork))
+   allocate(iwork(liwork) )
+   call dsyevd( 'V', 'U', mo_tot_num, F,                             &
+       size(F,1), diag, work, lwork, iwork, liwork, info)
+   deallocate(iwork)
+   
+   
+   if (info /= 0) then
+     call dsyev( 'V', 'L', mo_tot_num, F,                            &
+         size(F,1), diag, work, lwork, info)
+     
+     if (info /= 0) then
+       print *,  irp_here//' DSYEV failed : ', info
+       stop 1
+     endif
+   endif
+   
+   call dgemm('N','N',ao_num,mo_tot_num,mo_tot_num, 1.d0,            &
+       mo_coef, size(mo_coef,1), F, size(F,1),                       &
+       0.d0, eigenvectors_Fock_matrix_mo, size(eigenvectors_Fock_matrix_mo,1))
+   deallocate(work, F, diag)
+   
+
+END_PROVIDER
+ 
diff --git a/src/hartree_fock/diis.irp.f b/src/hartree_fock/diis.irp.f
new file mode 100644
index 00000000..6ebb5879
--- /dev/null
+++ b/src/hartree_fock/diis.irp.f
@@ -0,0 +1,139 @@
+BEGIN_PROVIDER [ double precision, threshold_DIIS_nonzero ]
+ implicit none
+ BEGIN_DOC
+ ! If threshold_DIIS is zero, choose sqrt(thresh_scf)
+ END_DOC
+ if (threshold_DIIS == 0.d0) then
+   threshold_DIIS_nonzero = dsqrt(thresh_scf)
+ else
+   threshold_DIIS_nonzero = threshold_DIIS
+ endif
+ ASSERT (threshold_DIIS_nonzero >= 0.d0)
+
+END_PROVIDER
+
+BEGIN_PROVIDER [double precision, FPS_SPF_Matrix_AO, (AO_num, AO_num)]
+  implicit none
+  BEGIN_DOC
+  !   Commutator FPS - SPF
+  END_DOC
+  double precision, allocatable  :: scratch(:,:)
+  allocate(                                                          &
+      scratch(AO_num, AO_num)                                  &
+      )
+  
+  ! Compute FP
+  
+  call dgemm('N','N',AO_num,AO_num,AO_num,                           &
+      1.d0,                                                          &
+      Fock_Matrix_AO,Size(Fock_Matrix_AO,1),                         &
+      HF_Density_Matrix_AO,Size(HF_Density_Matrix_AO,1),             &
+      0.d0,                                                          &
+      scratch,Size(scratch,1))
+  
+  ! Compute FPS
+  
+  call dgemm('N','N',AO_num,AO_num,AO_num,                           &
+      1.d0,                                                          &
+      scratch,Size(scratch,1),                                       &
+      AO_Overlap,Size(AO_Overlap,1),                                 &
+      0.d0,                                                          &
+      FPS_SPF_Matrix_AO,Size(FPS_SPF_Matrix_AO,1))
+  
+  ! Compute SP
+  
+  call dgemm('N','N',AO_num,AO_num,AO_num,                           &
+      1.d0,                                                          &
+      AO_Overlap,Size(AO_Overlap,1),                                 &
+      HF_Density_Matrix_AO,Size(HF_Density_Matrix_AO,1),             &
+      0.d0,                                                          &
+      scratch,Size(scratch,1))
+  
+  ! Compute FPS - SPF
+  
+  call dgemm('N','N',AO_num,AO_num,AO_num,                           &
+      -1.d0,                                                         &
+      scratch,Size(scratch,1),                                       &
+      Fock_Matrix_AO,Size(Fock_Matrix_AO,1),                         &
+      1.d0,                                                          &
+      FPS_SPF_Matrix_AO,Size(FPS_SPF_Matrix_AO,1))
+
+END_PROVIDER
+
+bEGIN_PROVIDER [double precision, FPS_SPF_Matrix_MO, (mo_tot_num, mo_tot_num)]
+  implicit none
+  begin_doc 
+!   Commutator FPS - SPF in MO basis
+  end_doc
+  call ao_to_mo(FPS_SPF_Matrix_AO, size(FPS_SPF_Matrix_AO,1), &
+     FPS_SPF_Matrix_MO, size(FPS_SPF_Matrix_MO,1))
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ double precision, eigenvalues_Fock_matrix_AO, (AO_num) ]
+&BEGIN_PROVIDER [ double precision, eigenvectors_Fock_matrix_AO, (AO_num,AO_num) ]
+
+   BEGIN_DOC
+   ! Eigenvalues and eigenvectors of the Fock matrix over the AO basis
+   END_DOC
+
+   implicit none
+   
+   double precision, allocatable  :: scratch(:,:),work(:),Xt(:,:)
+   integer                        :: lwork,info
+   integer                        :: i,j
+   
+   lwork = 3*AO_num - 1
+   allocate(                                                         &
+       scratch(AO_num,AO_num),                                 &
+       work(lwork),                                                  &
+       Xt(AO_num,AO_num)                                             &
+       )
+ 
+! Calculate Xt
+
+  do i=1,AO_num
+    do j=1,AO_num
+      Xt(i,j) = S_half_inv(j,i)
+    enddo
+  enddo
+
+! Calculate Fock matrix in orthogonal basis: F' = Xt.F.X
+
+  call dgemm('N','N',AO_num,AO_num,AO_num,     &
+       1.d0,                                   &
+       Fock_matrix_AO,size(Fock_matrix_AO,1),  &
+       S_half_inv,size(S_half_inv,1),        &
+       0.d0,                                   &
+       eigenvectors_Fock_matrix_AO,size(eigenvectors_Fock_matrix_AO,1))       
+
+  call dgemm('N','N',AO_num,AO_num,AO_num,                              &
+       1.d0,                                                            &
+       Xt,size(Xt,1),                                                   &
+       eigenvectors_Fock_matrix_AO,size(eigenvectors_Fock_matrix_AO,1), &
+       0.d0,                                                            &
+       scratch,size(scratch,1))
+     
+! Diagonalize F' to obtain eigenvectors in orthogonal basis C' and eigenvalues
+  
+   call dsyev('V','U',AO_num,       &
+        scratch,size(scratch,1),    &
+        eigenvalues_Fock_matrix_AO, &
+        work,lwork,info)
+ 
+   if(info /= 0) then
+     print *,  irp_here//' failed : ', info
+     stop 1
+   endif
+
+! Back-transform eigenvectors: C =X.C'
+
+  call dgemm('N','N',AO_num,AO_num,AO_num,     &
+       1.d0,                                   &
+       S_half_inv,size(S_half_inv,1),        &
+       scratch,size(scratch,1),                &
+       0.d0,                                   &
+       eigenvectors_Fock_matrix_AO,size(eigenvectors_Fock_matrix_AO,1))       
+   
+END_PROVIDER
+
diff --git a/src/hartree_fock/fock_matrix.irp.f b/src/hartree_fock/fock_matrix.irp.f
new file mode 100644
index 00000000..7f473f7a
--- /dev/null
+++ b/src/hartree_fock/fock_matrix.irp.f
@@ -0,0 +1,320 @@
+ BEGIN_PROVIDER [ double precision, Fock_matrix_mo, (mo_tot_num,mo_tot_num) ]
+&BEGIN_PROVIDER [ double precision, Fock_matrix_diag_mo, (mo_tot_num)]
+   implicit none
+   BEGIN_DOC
+   ! Fock matrix on the MO basis.
+   ! For open shells, the ROHF Fock Matrix is
+   !
+   !  |   F-K    |  F + K/2  |    F     |
+   !  |---------------------------------|
+   !  | F + K/2  |     F     |  F - K/2 |
+   !  |---------------------------------|
+   !  |    F     |  F - K/2  |  F + K   |
+   !
+   ! F = 1/2 (Fa + Fb)
+   !
+   ! K = Fb - Fa
+   !
+   END_DOC
+   integer                        :: i,j,n
+   if (elec_alpha_num == elec_beta_num) then
+     Fock_matrix_mo = Fock_matrix_mo_alpha
+   else
+     
+     do j=1,elec_beta_num
+       ! F-K
+       do i=1,elec_beta_num
+         Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))&
+             - (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j))
+       enddo
+       ! F+K/2
+       do i=elec_beta_num+1,elec_alpha_num
+         Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))&
+             + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j))
+       enddo
+       ! F
+       do i=elec_alpha_num+1, mo_tot_num
+         Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))
+       enddo
+     enddo
+
+     do j=elec_beta_num+1,elec_alpha_num
+       ! F+K/2
+       do i=1,elec_beta_num
+         Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))&
+             + 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j))
+       enddo
+       ! F
+       do i=elec_beta_num+1,elec_alpha_num
+         Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))
+       enddo
+       ! F-K/2
+       do i=elec_alpha_num+1, mo_tot_num
+         Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))&
+             - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j))
+       enddo
+     enddo
+
+     do j=elec_alpha_num+1, mo_tot_num
+       ! F
+       do i=1,elec_beta_num
+         Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))
+       enddo
+       ! F-K/2
+       do i=elec_beta_num+1,elec_alpha_num
+         Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j))&
+             - 0.5d0*(Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j))
+       enddo
+       ! F+K
+       do i=elec_alpha_num+1,mo_tot_num
+         Fock_matrix_mo(i,j) = 0.5d0*(Fock_matrix_mo_alpha(i,j)+Fock_matrix_mo_beta(i,j)) &
+             + (Fock_matrix_mo_beta(i,j) - Fock_matrix_mo_alpha(i,j))
+       enddo
+     enddo
+     
+   endif
+
+   do i = 1, mo_tot_num
+     Fock_matrix_diag_mo(i) = Fock_matrix_mo(i,i) 
+   enddo
+END_PROVIDER
+ 
+ 
+ 
+ BEGIN_PROVIDER [ double precision, Fock_matrix_ao_alpha, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, Fock_matrix_ao_beta,  (ao_num, ao_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Alpha Fock matrix in AO basis set
+ END_DOC
+ 
+ integer                        :: i,j
+ do j=1,ao_num
+   do i=1,ao_num
+     Fock_matrix_ao_alpha(i,j) = ao_mono_elec_integral(i,j) + ao_bi_elec_integral_alpha(i,j)
+     Fock_matrix_ao_beta (i,j) = ao_mono_elec_integral(i,j) + ao_bi_elec_integral_beta (i,j)
+   enddo
+ enddo
+
+END_PROVIDER
+
+
+ BEGIN_PROVIDER [ double precision, ao_bi_elec_integral_alpha, (ao_num, ao_num) ]
+&BEGIN_PROVIDER [ double precision, ao_bi_elec_integral_beta ,  (ao_num, ao_num) ]
+ use map_module
+ implicit none
+ BEGIN_DOC
+ ! Alpha Fock matrix in AO basis set
+ END_DOC
+ 
+ integer                        :: i,j,k,l,k1,r,s
+ integer                        :: i0,j0,k0,l0
+ integer*8                      :: p,q
+ double precision               :: integral, c0, c1, c2
+ double precision               :: ao_bielec_integral, local_threshold
+ double precision, allocatable  :: ao_bi_elec_integral_alpha_tmp(:,:)
+ double precision, allocatable  :: ao_bi_elec_integral_beta_tmp(:,:)
+
+ ao_bi_elec_integral_alpha = 0.d0
+ ao_bi_elec_integral_beta  = 0.d0
+ if (do_direct_integrals) then
+
+   !$OMP PARALLEL DEFAULT(NONE)                                      &
+       !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,p,q,r,s,i0,j0,k0,l0, &
+       !$OMP ao_bi_elec_integral_alpha_tmp,ao_bi_elec_integral_beta_tmp, c0, c1, c2, &
+       !$OMP local_threshold)&
+       !$OMP SHARED(ao_num,HF_density_matrix_ao_alpha,HF_density_matrix_ao_beta,&
+       !$OMP ao_integrals_map,ao_integrals_threshold, ao_bielec_integral_schwartz, &
+       !$OMP ao_overlap_abs, ao_bi_elec_integral_alpha, ao_bi_elec_integral_beta)
+
+   allocate(keys(1), values(1))
+   allocate(ao_bi_elec_integral_alpha_tmp(ao_num,ao_num), &
+            ao_bi_elec_integral_beta_tmp(ao_num,ao_num))
+   ao_bi_elec_integral_alpha_tmp = 0.d0
+   ao_bi_elec_integral_beta_tmp  = 0.d0
+
+   q = ao_num*ao_num*ao_num*ao_num
+   !$OMP DO SCHEDULE(dynamic)
+   do p=1_8,q
+           call bielec_integrals_index_reverse(kk,ii,ll,jj,p)
+           if ( (kk(1)>ao_num).or. &
+                (ii(1)>ao_num).or. &
+                (jj(1)>ao_num).or. &
+                (ll(1)>ao_num) ) then
+                cycle
+           endif
+           k = kk(1)
+           i = ii(1)
+           l = ll(1)
+           j = jj(1)
+
+           if (ao_overlap_abs(k,l)*ao_overlap_abs(i,j)  &
+              < ao_integrals_threshold) then
+             cycle
+           endif
+           local_threshold = ao_bielec_integral_schwartz(k,l)*ao_bielec_integral_schwartz(i,j)
+           if (local_threshold < ao_integrals_threshold) then
+             cycle
+           endif
+           i0 = i
+           j0 = j
+           k0 = k
+           l0 = l
+           values(1) = 0.d0
+           local_threshold = ao_integrals_threshold/local_threshold
+           do k2=1,8
+             if (kk(k2)==0) then
+               cycle
+             endif
+             i = ii(k2)
+             j = jj(k2)
+             k = kk(k2)
+             l = ll(k2)
+             c0 = HF_density_matrix_ao_alpha(k,l)+HF_density_matrix_ao_beta(k,l)
+             c1 = HF_density_matrix_ao_alpha(k,i)
+             c2 = HF_density_matrix_ao_beta(k,i)
+             if ( dabs(c0)+dabs(c1)+dabs(c2) < local_threshold) then
+               cycle
+             endif
+             if (values(1) == 0.d0) then
+               values(1) = ao_bielec_integral(k0,l0,i0,j0)
+             endif
+             integral = c0 * values(1)
+             ao_bi_elec_integral_alpha_tmp(i,j) += integral
+             ao_bi_elec_integral_beta_tmp (i,j) += integral
+             integral = values(1)
+             ao_bi_elec_integral_alpha_tmp(l,j) -= c1 * integral
+             ao_bi_elec_integral_beta_tmp (l,j) -= c2 * integral
+           enddo
+   enddo
+   !$OMP END DO NOWAIT
+   !$OMP CRITICAL
+   ao_bi_elec_integral_alpha += ao_bi_elec_integral_alpha_tmp
+   !$OMP END CRITICAL
+   !$OMP CRITICAL
+   ao_bi_elec_integral_beta  += ao_bi_elec_integral_beta_tmp
+   !$OMP END CRITICAL
+   deallocate(keys,values,ao_bi_elec_integral_alpha_tmp,ao_bi_elec_integral_beta_tmp)
+   !$OMP END PARALLEL
+ else
+   PROVIDE ao_bielec_integrals_in_map 
+           
+   integer(omp_lock_kind) :: lck(ao_num)
+   integer*8                      :: i8
+   integer                        :: ii(8), jj(8), kk(8), ll(8), k2
+   integer(cache_map_size_kind)   :: n_elements_max, n_elements
+   integer(key_kind), allocatable :: keys(:)
+   double precision, allocatable  :: values(:)
+
+   !$OMP PARALLEL DEFAULT(NONE)                                      &
+       !$OMP PRIVATE(i,j,l,k1,k,integral,ii,jj,kk,ll,i8,keys,values,n_elements_max, &
+       !$OMP  n_elements,ao_bi_elec_integral_alpha_tmp,ao_bi_elec_integral_beta_tmp)&
+       !$OMP SHARED(ao_num,HF_density_matrix_ao_alpha,HF_density_matrix_ao_beta,&
+       !$OMP  ao_integrals_map, ao_bi_elec_integral_alpha, ao_bi_elec_integral_beta) 
+
+   call get_cache_map_n_elements_max(ao_integrals_map,n_elements_max)
+   allocate(keys(n_elements_max), values(n_elements_max))
+   allocate(ao_bi_elec_integral_alpha_tmp(ao_num,ao_num), &
+            ao_bi_elec_integral_beta_tmp(ao_num,ao_num))
+   ao_bi_elec_integral_alpha_tmp = 0.d0
+   ao_bi_elec_integral_beta_tmp  = 0.d0
+
+   !$OMP DO SCHEDULE(dynamic,64)
+   !DIR$ NOVECTOR
+   do i8=0_8,ao_integrals_map%map_size
+     n_elements = n_elements_max
+     call get_cache_map(ao_integrals_map,i8,keys,values,n_elements)
+     do k1=1,n_elements
+       call bielec_integrals_index_reverse(kk,ii,ll,jj,keys(k1))
+
+       do k2=1,8
+         if (kk(k2)==0) then
+           cycle
+         endif
+         i = ii(k2)
+         j = jj(k2)
+         k = kk(k2)
+         l = ll(k2)
+         integral = (HF_density_matrix_ao_alpha(k,l)+HF_density_matrix_ao_beta(k,l)) * values(k1)
+         ao_bi_elec_integral_alpha_tmp(i,j) += integral
+         ao_bi_elec_integral_beta_tmp (i,j) += integral
+         integral = values(k1)
+         ao_bi_elec_integral_alpha_tmp(l,j) -= HF_density_matrix_ao_alpha(k,i) * integral
+         ao_bi_elec_integral_beta_tmp (l,j) -= HF_density_matrix_ao_beta (k,i) * integral
+       enddo
+     enddo
+   enddo
+   !$OMP END DO NOWAIT
+   !$OMP CRITICAL
+   ao_bi_elec_integral_alpha += ao_bi_elec_integral_alpha_tmp
+   !$OMP END CRITICAL
+   !$OMP CRITICAL
+   ao_bi_elec_integral_beta  += ao_bi_elec_integral_beta_tmp
+   !$OMP END CRITICAL
+   deallocate(keys,values,ao_bi_elec_integral_alpha_tmp,ao_bi_elec_integral_beta_tmp)
+   !$OMP END PARALLEL
+
+ endif
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_mo_alpha, (mo_tot_num,mo_tot_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Fock matrix on the MO basis
+   END_DOC
+   call ao_to_mo(Fock_matrix_ao_alpha,size(Fock_matrix_ao_alpha,1), &
+                 Fock_matrix_mo_alpha,size(Fock_matrix_mo_alpha,1))
+END_PROVIDER
+ 
+ 
+BEGIN_PROVIDER [ double precision, Fock_matrix_mo_beta, (mo_tot_num,mo_tot_num) ]
+   implicit none
+   BEGIN_DOC
+   ! Fock matrix on the MO basis
+   END_DOC
+   call ao_to_mo(Fock_matrix_ao_beta,size(Fock_matrix_ao_beta,1), &
+                 Fock_matrix_mo_beta,size(Fock_matrix_mo_beta,1))
+END_PROVIDER
+ 
+BEGIN_PROVIDER [ double precision, HF_energy ]
+ implicit none
+ BEGIN_DOC
+ ! Hartree-Fock energy
+ END_DOC
+ HF_energy = nuclear_repulsion
+ 
+ integer                        :: i,j
+ do j=1,ao_num
+   do i=1,ao_num
+     HF_energy += 0.5d0 * (                                          &
+         (ao_mono_elec_integral(i,j) + Fock_matrix_ao_alpha(i,j) ) *  HF_density_matrix_ao_alpha(i,j) +&
+         (ao_mono_elec_integral(i,j) + Fock_matrix_ao_beta (i,j) ) *  HF_density_matrix_ao_beta (i,j) )
+   enddo
+ enddo
+  
+END_PROVIDER
+
+
+BEGIN_PROVIDER [ double precision, Fock_matrix_ao, (ao_num, ao_num) ]
+ implicit none
+ BEGIN_DOC
+ ! Fock matrix in AO basis set
+ END_DOC
+ 
+ if ( (elec_alpha_num == elec_beta_num).and. &
+      (level_shift == 0.) ) &
+ then
+   integer                        :: i,j
+   do j=1,ao_num
+     do i=1,ao_num
+       Fock_matrix_ao(i,j) = Fock_matrix_ao_alpha(i,j)
+     enddo
+   enddo
+ else
+   call mo_to_ao(Fock_matrix_mo,size(Fock_matrix_mo,1), &
+      Fock_matrix_ao,size(Fock_matrix_ao,1)) 
+ endif
+END_PROVIDER
+
+
diff --git a/src/hartree_fock/hf_density_matrix_ao.irp.f b/src/hartree_fock/hf_density_matrix_ao.irp.f
new file mode 100644
index 00000000..736d1a97
--- /dev/null
+++ b/src/hartree_fock/hf_density_matrix_ao.irp.f
@@ -0,0 +1,41 @@
+BEGIN_PROVIDER [double precision, HF_density_matrix_ao_alpha, (ao_num,ao_num) ]
+   implicit none
+   BEGIN_DOC
+   ! S^{-1}.P_alpha.S^{-1}
+   END_DOC
+   
+   call dgemm('N','T',ao_num,ao_num,elec_alpha_num,1.d0, &
+        mo_coef, size(mo_coef,1), &
+        mo_coef, size(mo_coef,1), 0.d0, &
+        HF_density_matrix_ao_alpha, size(HF_density_matrix_ao_alpha,1))
+
+END_PROVIDER
+
+BEGIN_PROVIDER [ double precision, HF_density_matrix_ao_beta,  (ao_num,ao_num) ]
+   implicit none
+   BEGIN_DOC
+   ! S^{-1}.P_beta.S^{-1}
+   END_DOC
+   
+   call dgemm('N','T',ao_num,ao_num,elec_beta_num,1.d0, &
+        mo_coef, size(mo_coef,1), &
+        mo_coef, size(mo_coef,1), 0.d0, &
+        HF_density_matrix_ao_beta, size(HF_density_matrix_ao_beta,1))
+
+END_PROVIDER
+ 
+BEGIN_PROVIDER [ double precision, HF_density_matrix_ao, (ao_num,ao_num) ]
+   implicit none
+   BEGIN_DOC
+   ! S^{-1}.P.S^{-1}  where P = C.C^t
+   END_DOC
+   ASSERT (size(HF_density_matrix_ao,1) == size(HF_density_matrix_ao_alpha,1))
+   if (elec_alpha_num== elec_beta_num) then
+     HF_density_matrix_ao = HF_density_matrix_ao_alpha + HF_density_matrix_ao_alpha
+   else
+     ASSERT (size(HF_density_matrix_ao,1) == size(HF_density_matrix_ao_beta ,1))
+     HF_density_matrix_ao = HF_density_matrix_ao_alpha + HF_density_matrix_ao_beta
+   endif
+   
+END_PROVIDER
+ 
diff --git a/src/hartree_fock/huckel.irp.f b/src/hartree_fock/huckel.irp.f
new file mode 100644
index 00000000..c9e32ad5
--- /dev/null
+++ b/src/hartree_fock/huckel.irp.f
@@ -0,0 +1,34 @@
+subroutine huckel_guess
+  implicit none
+  BEGIN_DOC
+! Build the MOs using the extended Huckel model
+  END_DOC
+  integer                        :: i,j
+  double precision               :: accu
+  double precision               :: c
+  character*(64)                 :: label
+  double precision, allocatable  :: A(:,:)
+  label = "Guess"
+  c = 0.5d0 * 1.75d0
+
+  allocate (A(ao_num, ao_num))
+  A = 0.d0
+  do j=1,ao_num
+    do i=1,ao_num
+      A(i,j) = c * ao_overlap(i,j) * (ao_mono_elec_integral_diag(i) + ao_mono_elec_integral_diag(j))
+    enddo
+    A(j,j) = ao_mono_elec_integral_diag(j) + ao_bi_elec_integral_alpha(j,j)
+  enddo
+
+  Fock_matrix_ao_alpha(1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num)
+  Fock_matrix_ao_beta (1:ao_num,1:ao_num) = A(1:ao_num,1:ao_num)
+
+!  TOUCH mo_coef
+
+  TOUCH Fock_matrix_ao_alpha Fock_matrix_ao_beta
+  mo_coef = eigenvectors_fock_matrix_mo
+  SOFT_TOUCH mo_coef
+  call save_mos
+  deallocate(A)
+
+end
diff --git a/src/hartree_fock/roothaan_hall_scf.irp.f b/src/hartree_fock/roothaan_hall_scf.irp.f
new file mode 100644
index 00000000..e860496f
--- /dev/null
+++ b/src/hartree_fock/roothaan_hall_scf.irp.f
@@ -0,0 +1,289 @@
+subroutine Roothaan_Hall_SCF
+
+BEGIN_DOC
+! Roothaan-Hall algorithm for SCF Hartree-Fock calculation
+END_DOC
+
+  implicit none
+
+  double precision               :: energy_SCF,energy_SCF_previous,Delta_energy_SCF
+  double precision               :: max_error_DIIS,max_error_DIIS_alpha,max_error_DIIS_beta
+  double precision, allocatable  :: Fock_matrix_DIIS(:,:,:),error_matrix_DIIS(:,:,:)
+
+  integer                        :: iteration_SCF,dim_DIIS,index_dim_DIIS
+ 
+  integer                        :: i,j
+  double precision, allocatable :: mo_coef_save(:,:)
+
+  PROVIDE ao_md5 mo_occ level_shift 
+ 
+  allocate(mo_coef_save(ao_num,mo_tot_num),                          &
+      Fock_matrix_DIIS (ao_num,ao_num,max_dim_DIIS),                 &
+      error_matrix_DIIS(ao_num,ao_num,max_dim_DIIS)                  &
+      )
+
+  call write_time(6)
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================'
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16)')  &
+    '  N ', 'Energy  ', 'Energy diff  ', 'DIIS error  '
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16)')  &
+    '====','================','================','================'
+
+! Initialize energies and density matrices
+
+  energy_SCF_previous = HF_energy
+  Delta_energy_SCF    = 1.d0
+  iteration_SCF       = 0
+  dim_DIIS            = 0
+  max_error_DIIS      = 1.d0
+
+
+!
+! Start of main SCF loop
+!
+  do while(( (max_error_DIIS > threshold_DIIS_nonzero).or.(dabs(Delta_energy_SCF) > thresh_SCF) ) .and. (iteration_SCF < n_it_SCF_max))
+
+! Increment cycle number
+
+    iteration_SCF += 1
+
+! Current size of the DIIS space
+
+    dim_DIIS = min(dim_DIIS+1,max_dim_DIIS)
+ 
+    if (scf_algorithm == 'DIIS') then
+      
+      ! Store Fock and error matrices at each iteration
+      do j=1,ao_num
+        do i=1,ao_num
+          index_dim_DIIS = mod(dim_DIIS-1,max_dim_DIIS)+1
+          Fock_matrix_DIIS (i,j,index_dim_DIIS) = Fock_matrix_AO(i,j)
+          error_matrix_DIIS(i,j,index_dim_DIIS) = FPS_SPF_matrix_AO(i,j)
+        enddo
+      enddo
+      
+      ! Compute the extrapolated Fock matrix
+
+      call extrapolate_Fock_matrix(                                    &
+          error_matrix_DIIS,Fock_matrix_DIIS,                          &
+          Fock_matrix_AO,size(Fock_matrix_AO,1),                       &
+          iteration_SCF,dim_DIIS                                       &
+          )
+
+      Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
+      Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
+      TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta
+
+    endif
+
+    MO_coef = eigenvectors_Fock_matrix_MO
+
+    TOUCH MO_coef
+
+!   Calculate error vectors
+
+    max_error_DIIS = maxval(Abs(FPS_SPF_Matrix_MO))
+
+!   SCF energy
+
+    energy_SCF = HF_energy
+    Delta_Energy_SCF = energy_SCF - energy_SCF_previous
+    if ( (SCF_algorithm == 'DIIS').and.(Delta_Energy_SCF > 0.d0) ) then
+      Fock_matrix_AO(1:ao_num,1:ao_num) = Fock_matrix_DIIS (1:ao_num,1:ao_num,index_dim_DIIS)
+      Fock_matrix_AO_alpha = Fock_matrix_AO*0.5d0
+      Fock_matrix_AO_beta  = Fock_matrix_AO*0.5d0
+      TOUCH Fock_matrix_AO_alpha Fock_matrix_AO_beta
+    endif
+
+    double precision :: level_shift_save
+    level_shift_save = level_shift
+    mo_coef_save(1:ao_num,1:mo_tot_num) = mo_coef(1:ao_num,1:mo_tot_num)
+    do while (Delta_Energy_SCF > 0.d0)
+      mo_coef(1:ao_num,1:mo_tot_num) = mo_coef_save
+      TOUCH mo_coef
+      level_shift = level_shift + 1.0d0
+      mo_coef(1:ao_num,1:mo_tot_num) = eigenvectors_Fock_matrix_MO(1:ao_num,1:mo_tot_num)
+      TOUCH mo_coef level_shift
+      Delta_Energy_SCF = HF_energy - energy_SCF_previous
+      energy_SCF = HF_energy
+      if (level_shift-level_shift_save > 50.d0) then
+        level_shift = level_shift_save
+        SOFT_TOUCH level_shift
+        exit
+      endif
+      dim_DIIS=0
+    enddo
+    level_shift = level_shift * 0.75d0
+    SOFT_TOUCH level_shift
+    energy_SCF_previous = energy_SCF
+
+!   Print results at the end of each iteration
+
+    write(6,'(I4, 1X, F16.10, 1X, F16.10, 1X, F16.10, 1X, I3)')  &
+      iteration_SCF, energy_SCF, Delta_energy_SCF, max_error_DIIS, dim_DIIS
+
+    if (Delta_energy_SCF < 0.d0) then
+      call save_mos
+    endif
+
+  enddo
+
+!
+! End of Main SCF loop
+!
+
+  write(6,'(A4, 1X, A16, 1X, A16, 1X, A16)') &
+    '====','================','================','================'
+  write(6,*)
+  
+  if(.not.no_oa_or_av_opt)then
+   call mo_as_eigvectors_of_mo_matrix(Fock_matrix_mo,size(Fock_matrix_mo,1),size(Fock_matrix_mo,2),mo_label,1,.true.)
+  endif
+
+  call write_double(6, Energy_SCF, 'Hartree-Fock energy')
+  call ezfio_set_hartree_fock_energy(Energy_SCF)
+
+  call write_time(6)
+
+end
+
+subroutine extrapolate_Fock_matrix(      &   
+  error_matrix_DIIS,Fock_matrix_DIIS,    &
+  Fock_matrix_AO_,size_Fock_matrix_AO,   &
+  iteration_SCF,dim_DIIS                 &
+)
+
+BEGIN_DOC
+! Compute the extrapolated Fock matrix using the DIIS procedure
+END_DOC
+
+  implicit none
+
+  double precision,intent(in)   :: Fock_matrix_DIIS(ao_num,ao_num,*),error_matrix_DIIS(ao_num,ao_num,*)
+  integer,intent(in)            :: iteration_SCF, size_Fock_matrix_AO
+  double precision,intent(inout):: Fock_matrix_AO_(size_Fock_matrix_AO,ao_num)
+  integer,intent(inout)         :: dim_DIIS
+
+  double precision,allocatable  :: B_matrix_DIIS(:,:),X_vector_DIIS(:)
+  double precision,allocatable  :: C_vector_DIIS(:)
+
+  double precision,allocatable  :: scratch(:,:)
+  integer                       :: i,j,k,i_DIIS,j_DIIS
+
+  allocate(                               &
+    B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1), &
+    X_vector_DIIS(dim_DIIS+1),            &
+    C_vector_DIIS(dim_DIIS+1),            &
+    scratch(ao_num,ao_num)                &
+  )
+
+! Compute the matrices B and X
+  do j=1,dim_DIIS
+    do i=1,dim_DIIS
+
+      j_DIIS = mod(iteration_SCF-j,max_dim_DIIS)+1
+      i_DIIS = mod(iteration_SCF-i,max_dim_DIIS)+1
+
+! Compute product of two errors vectors
+
+      call dgemm('N','N',ao_num,ao_num,ao_num,                      &
+           1.d0,                                                    &
+           error_matrix_DIIS(1,1,i_DIIS),size(error_matrix_DIIS,1), &
+           error_matrix_DIIS(1,1,j_DIIS),size(error_matrix_DIIS,1), &
+           0.d0,                                                    &
+           scratch,size(scratch,1))                                                  
+
+! Compute Trace
+
+      B_matrix_DIIS(i,j) = 0.d0
+      do k=1,ao_num
+        B_matrix_DIIS(i,j) = B_matrix_DIIS(i,j) + scratch(k,k)
+      enddo
+    enddo
+  enddo
+
+! Pad B matrix and build the X matrix
+
+  do i=1,dim_DIIS
+    B_matrix_DIIS(i,dim_DIIS+1) = -1.d0
+    B_matrix_DIIS(dim_DIIS+1,i) = -1.d0
+    C_vector_DIIS(i) = 0.d0
+  enddo
+  B_matrix_DIIS(dim_DIIS+1,dim_DIIS+1) = 0.d0
+  C_vector_DIIS(dim_DIIS+1) = -1.d0
+
+! Solve the linear system C = B.X
+
+  integer              :: info
+  integer,allocatable  :: ipiv(:)
+
+  allocate(          &
+    ipiv(dim_DIIS+1) &
+  )
+
+  double precision, allocatable :: AF(:,:) 
+  allocate (AF(dim_DIIS+1,dim_DIIS+1))
+  double precision :: rcond, ferr, berr
+  integer :: iwork(dim_DIIS+1), lwork
+  
+  call dsysvx('N','U',dim_DIIS+1,1,      &
+    B_matrix_DIIS,size(B_matrix_DIIS,1), &
+    AF, size(AF,1),                      &
+    ipiv,                                &
+    C_vector_DIIS,size(C_vector_DIIS,1), &
+    X_vector_DIIS,size(X_vector_DIIS,1), &
+    rcond,                               &
+    ferr,                                &
+    berr,                                &
+    scratch,-1,                          &
+    iwork,                               &
+    info                                 &
+  )
+  lwork = int(scratch(1,1))
+  deallocate(scratch)
+  allocate(scratch(lwork,1))
+
+  call dsysvx('N','U',dim_DIIS+1,1,      &
+    B_matrix_DIIS,size(B_matrix_DIIS,1), &
+    AF, size(AF,1),                      &
+    ipiv,                                &
+    C_vector_DIIS,size(C_vector_DIIS,1), &
+    X_vector_DIIS,size(X_vector_DIIS,1), &
+    rcond,                               &
+    ferr,                                &
+    berr,                                &
+    scratch,size(scratch),               &
+    iwork,                               &
+    info                                 &
+  )
+
+ if(info < 0) then
+   stop 'bug in DIIS'
+ endif
+
+ if (rcond > 1.d-12) then
+   
+  ! Compute extrapolated Fock matrix
+
+
+      !$OMP PARALLEL DO PRIVATE(i,j,k) DEFAULT(SHARED)
+      do j=1,ao_num
+        do i=1,ao_num
+          Fock_matrix_AO_(i,j) = 0.d0
+        enddo
+        do k=1,dim_DIIS
+          do i=1,ao_num
+            Fock_matrix_AO_(i,j) = Fock_matrix_AO_(i,j) +            &
+                X_vector_DIIS(k)*Fock_matrix_DIIS(i,j,dim_DIIS-k+1)
+          enddo
+        enddo
+      enddo
+      !$OMP END PARALLEL DO
+
+  else
+    dim_DIIS = 0
+  endif
+
+end
diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f
new file mode 100644
index 00000000..a04fcc31
--- /dev/null
+++ b/src/hartree_fock/scf.irp.f
@@ -0,0 +1,60 @@
+program scf
+  BEGIN_DOC
+! Produce `Hartree_Fock` MO orbital 
+! output: mo_basis.mo_tot_num mo_basis.mo_label mo_basis.ao_md5 mo_basis.mo_coef mo_basis.mo_occ
+! output: hartree_fock.energy
+! optional: mo_basis.mo_coef
+  END_DOC
+  call create_guess
+  call orthonormalize_mos
+  call run
+end
+
+subroutine create_guess
+  implicit none
+  BEGIN_DOC
+!   Create a MO guess if no MOs are present in the EZFIO directory
+  END_DOC
+  logical                        :: exists
+  PROVIDE ezfio_filename
+  call ezfio_has_mo_basis_mo_coef(exists)
+  if (.not.exists) then
+    if (mo_guess_type == "HCore") then
+      mo_coef = ao_ortho_lowdin_coef
+      TOUCH mo_coef
+      mo_label = 'Guess'
+      call mo_as_eigvectors_of_mo_matrix(mo_mono_elec_integral,size(mo_mono_elec_integral,1),size(mo_mono_elec_integral,2),mo_label,1,.false.)
+      SOFT_TOUCH mo_coef mo_label
+    else if (mo_guess_type == "Huckel") then
+      call huckel_guess
+    else
+      print *,  'Unrecognized MO guess type : '//mo_guess_type
+      stop 1
+    endif
+  endif
+end
+
+subroutine run
+
+  BEGIN_DOC
+!   Run SCF calculation
+  END_DOC
+
+  use bitmasks
+  implicit none
+
+  double precision               :: SCF_energy_before,SCF_energy_after,diag_H_mat_elem
+  double precision               :: EHF
+  integer                        :: i_it, i, j, k
+   
+  EHF = HF_energy 
+
+  mo_label = "Canonical"
+
+! Choose SCF algorithm
+
+  call Roothaan_Hall_SCF
+  
+end
+
+
diff --git a/src/mrpt_utils/H_apply.irp.f b/src/mrpt_utils/h_apply.irp.f
similarity index 100%
rename from src/mrpt_utils/H_apply.irp.f
rename to src/mrpt_utils/h_apply.irp.f
diff --git a/src/perturbation/perturbation.irp.f b/src/perturbation/perturbation.irp.f
index f7d1f478..040f3026 100644
--- a/src/perturbation/perturbation.irp.f
+++ b/src/perturbation/perturbation.irp.f
@@ -2,7 +2,7 @@ BEGIN_SHELL [ /usr/bin/env python2 ]
 from perturbation import perturbations
 import os
 
-filename = os.environ["QP_ROOT"]+"/src/Perturbation/perturbation.template.f"
+filename = os.environ["QP_ROOT"]+"/src/perturbation/perturbation.template.f"
 file = open(filename,'r')
 template = file.read()
 file.close()
diff --git a/src/utils/LinearAlgebra.irp.f b/src/utils/linear_algebra.irp.f
similarity index 100%
rename from src/utils/LinearAlgebra.irp.f
rename to src/utils/linear_algebra.irp.f