Merging with master

2024-12-30 23:25:39 +01:00 · 2022-11-29 11:50:25 +01:00 · 2022-11-29 11:50:25 +01:00 · 0f1f5a8cdd
commit 0f1f5a8cdd
parent 7c52335c85
54 changed files with 481 additions and 690 deletions
--- a/RELEASE_NOTES.org
+++ b/RELEASE_NOTES.org
@ -4,90 +4,100 @@

 ** Changes

-  - Python3 replaces Python2
-  - Travis CI uses 3 jobs
-  - Moved Travis scripts into ~travis~ directory
-  - IRPF90 and EZFIO are now git submodules
-  - Now basis sets should be downloaded from basis-set-exchange website
-  - Added ~bse~ in the installable tools
-  - Documentation in ~src/README.rst~
-  - Added two-body reduced density matrix
-  - Added basis set correction
-  - Added CAS-based on-top density functional
-  - Improve PT2 computation for excited-states: Mostly 2x2
-    diagonalization, and some (n+1)x(n+1) diagonalizations
-  - Error bars for stochastic variance and norm of the perturbed wave function
-  - Improve PT2-matching for excited-states
-  - Compute the overlap of PT2 excited states
-  - Renamed SOP into CFG
-  - Improved parallelism in PT2 by splitting tasks
-  - Use max in multi-state PT2 instead of sum for the selection weight
-  - Added seniority
-  - Added excitation_max
-  - More tasks for distribueted Davidson
-  - Random guess vectors in Davidson have zeros to preserve symmetry
-  - Disk-based Davidson when too much memory is required
-  - Fixed bug in DIIS
-  - Fixed bug in molden (Au -> Angs)
-  - Fixed bug with non-contiguous MOs in active space and deleter MOs
-  - Complete network-free installation
-  - Fixed bug in selection when computing full PT2
-  - Updated version of f77-zmq
+   - Python3 replaces Python2
+   - Travis CI uses 3 jobs
+   - Moved Travis scripts into ~travis~ directory
+   - IRPF90 and EZFIO are now git submodules
+   - Now basis sets should be downloaded from basis-set-exchange website
+   - Added ~bse~ in the installable tools
+   - Documentation in ~src/README.rst~
+   - Added two-body reduced density matrix
+   - Added basis set correction
+   - Added GTOs with complex exponent
+   - Added many types of integrals
+   - Added CAS-based on-top density functional
+   - Improve PT2 computation for excited-states: Mostly 2x2
+     diagonalization, and some (n+1)x(n+1) diagonalizations
+   - Error bars for stochastic variance and norm of the perturbed wave function
+   - Improve PT2-matching for excited-states
+   - Compute the overlap of PT2 excited states
+   - Renamed SOP into CFG
+   - Improved parallelism in PT2 by splitting tasks
+   - Use max in multi-state PT2 instead of sum for the selection weight
+   - Added seniority
+   - Added excitation_max
+   - More tasks for distribueted Davidson
+   - Random guess vectors in Davidson have zeros to preserve symmetry
+   - Disk-based Davidson when too much memory is required
+   - Fixed bug in DIIS
+   - Fixed bug in molden (Au -> Angs)
+   - Fixed bug with non-contiguous MOs in active space and deleter MOs
+   - Complete network-free installation
+   - Fixed bug in selection when computing full PT2
+   - Updated version of f77-zmq
+   - Added transcorrelated SCF
+   - Added transcorrelated CIPSI
+   - Started to introduce shells in AOs
+   - Added ECMD UEG functional
+   - Introduced DFT-based basis set correction
+   - General davidson algorithm

-*** User interface
+** User interface

-    - Added ~qp_basis~ script to install a basis set from the ~bse~
-      command-line tool
-    - Introduced ~n_det_qp_edit~, ~psi_det_qp_edit~, and
- ~psi_coef_qp_edit~ to accelerate the opening of qp_edit with
-      large wave functions
-    - Removed ~etc/ninja.rc~
-    - Added flag to specify if the AOs are normalized
-    - Added flag to specify if the primitive Gaussians are normalized
-    - Added ~lin_dep_cutoff~, the cutoff for linear dependencies
-    - Davidson convergence threshold can be adapted from PT2
-    - In ~density_for_dft~, ~no_core_density~ is now a logical
-    - Default for ~weight_selection~ has changed from 2 to 1
-    - Nullify_small_elements in matrices to keep symmetry
-    - Default of density functional changed from LDA to PBE
-    - Added ~no_vvvv_integrals~ flag
-    - Added ~pt2_min_parallel_tasks~ to control parallelism in PT2
-    - Added ~print_energy~
-    - Added ~print_hamiltonian~
-    - Added input for two body RDM
-    - Added keyword ~save_wf_after_selection~
-    - Added a ~restore_symm~ flag to enforce the restoration of
-      symmetry in matrices
-    - qp_export_as_tgz exports also plugin codes
-    - Added a basis module containing basis set information
-    - Added qp_run truncate_wf
+   - Added ~qp_basis~ script to install a basis set from the ~bse~
+     command-line tool
+   - Introduced ~n_det_qp_edit~, ~psi_det_qp_edit~, and
+ ~psi_coef_qp_edit~ to accelerate the opening of qp_edit with
+     large wave functions
+   - Removed ~etc/ninja.rc~
+   - Added flag to specify if the AOs are normalized
+   - Added flag to specify if the primitive Gaussians are normalized
+   - Added ~lin_dep_cutoff~, the cutoff for linear dependencies
+   - Davidson convergence threshold can be adapted from PT2
+   - In ~density_for_dft~, ~no_core_density~ is now a logical
+   - Default for ~weight_selection~ has changed from 2 to 1
+   - Nullify_small_elements in matrices to keep symmetry
+   - Default of density functional changed from LDA to PBE
+   - Added ~no_vvvv_integrals~ flag
+   - Added ~pt2_min_parallel_tasks~ to control parallelism in PT2
+   - Added ~print_energy~
+   - Added ~print_hamiltonian~
+   - Added input for two body RDM
+   - Added keyword ~save_wf_after_selection~
+   - Added a ~restore_symm~ flag to enforce the restoration of
+     symmetry in matrices
+   - qp_export_as_tgz exports also plugin codes
+   - Added a basis module containing basis set information
+   - Added qp_run truncate_wf

-*** Code
+** Code

-    - Many bug fixes
-    - Changed electron-nucleus from ~e_n~ to ~n_e~ in names of variables
-    - Changed ~occ_pattern~ to ~configuration~
-    - Replaced ~List.map~ by a tail-recursive version ~Qputils.list_map~
-    - Added possible imaginary part in OCaml MO coefficients
-    - Added ~qp_clean_source_files.sh~ to remove non-ascii characters
-    - Added flag ~is_periodic~ for periodic systems
-    - Possibilities to handle complex integrals and complex MOs
-    - Moved pseuodpotential integrals out of ~ao_one_e_integrals~
-    - Removed Schwarz test and added logical functions
- ~ao_two_e_integral_zero~ and ~ao_one_e_integral_zero~
-    - Introduced type for ~pt2_data~
-    - Banned excitations are used with far apart localized MOs
-    - S_z2_Sz is now included in S2
-    - S^2 in single precision
-    - Added Shank function
-    - Added utilities for periodic calculations
-    - Added ~V_ne_psi_energy~
-    - Added ~h_core_guess~ routine
-    - Fixed Laplacians in real space (indices)
-    - Added LIB file to add extra libs in plugin
-    - Using Intel IPP for sorting when using Intel compiler
-    - Removed parallelism in sorting
-    - Compute banned_excitations from exchange integrals to accelerate with local MOs
+   - Many bug fixes
+   - Changed electron-nucleus from ~e_n~ to ~n_e~ in names of variables
+   - Changed ~occ_pattern~ to ~configuration~
+   - Replaced ~List.map~ by a tail-recursive version ~Qputils.list_map~
+   - Added possible imaginary part in OCaml MO coefficients
+   - Added ~qp_clean_source_files.sh~ to remove non-ascii characters
+   - Added flag ~is_periodic~ for periodic systems
+   - Possibilities to handle complex integrals and complex MOs
+   - Moved pseuodpotential integrals out of ~ao_one_e_integrals~
+   - Removed Schwarz test and added logical functions
+ ~ao_two_e_integral_zero~ and ~ao_one_e_integral_zero~
+   - Introduced type for ~pt2_data~
+   - Banned excitations are used with far apart localized MOs
+   - S_z2_Sz is now included in S2
+   - S^2 in single precision
+   - Added Shank function
+   - Added utilities for periodic calculations
+   - Added ~V_ne_psi_energy~
+   - Added ~h_core_guess~ routine
+   - Fixed Laplacians in real space (indices)
+   - Added LIB file to add extra libs in plugin
+   - Using Intel IPP for sorting when using Intel compiler
+   - Removed parallelism in sorting
+   - Compute banned_excitations from exchange integrals to accelerate with local MOs
+   - Updated OCaml for 4.13
+  



--- a/config/ifort_2021_avx.cfg
+++ b/config/ifort_2021_avx.cfg
@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -qmkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL 

--- a/config/ifort_2021_avx_mpi.cfg
+++ b/config/ifort_2021_avx_mpi.cfg
@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -qmkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL 

--- a/config/ifort_2021_mpi_rome.cfg
+++ b/config/ifort_2021_mpi_rome.cfg
@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -qmkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL 

--- a/config/ifort_2021_rome.cfg
+++ b/config/ifort_2021_rome.cfg
@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -qmkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL 

--- a/config/ifort_2021_sse4.cfg
+++ b/config/ifort_2021_sse4.cfg
@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -qmkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DINTEL 

--- a/config/ifort_2021_sse4_mpi.cfg
+++ b/config/ifort_2021_sse4_mpi.cfg
@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : mpiifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -qmkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=32 -DMPI -DINTEL 

--- a/config/ifort_2021_xHost.cfg
+++ b/config/ifort_2021_xHost.cfg
@ -7,7 +7,7 @@
 #
 [COMMON]
 FC           : ifort -fpic
-LAPACK_LIB   : -mkl=parallel
+LAPACK_LIB   : -qmkl=parallel
 IRPF90       : irpf90
 IRPF90_FLAGS : --ninja --align=64 -DINTEL 

--- a/2
+++ b/2
@ -246,7 +246,7 @@ EOF
            execute << EOF
              cd "\${QP_ROOT}"/external
              tar --gunzip --extract --file qp2-dependencies/bse-v0.8.11.tar.gz
-              pip install -e basis_set_exchange-*
+              python3 -m pip install -e basis_set_exchange-*
 EOF

    elif [[ ${PACKAGE} = zlib ]] ; then
--- a/etc/qp.rc
+++ b/etc/qp.rc
@ -80,6 +80,8 @@ function qp()
      if [[ -d $NAME ]] ; then
        [[ -d $EZFIO_FILE ]] && ezfio unset_file
        ezfio set_file $NAME
+      else
+        qp_create_ezfio -h | more
      fi
      unset _ARGS
      ;;
--- a/src/ao_basis/EZFIO.cfg
+++ b/src/ao_basis/EZFIO.cfg
@ -67,4 +67,3 @@ doc: Use normalized primitive functions
 interface: ezfio, provider
 default: true

-
--- a/src/ao_basis/spherical_to_cartesian.irp.f
+++ b/src/ao_basis/spherical_to_cartesian.irp.f
@ -1,7 +1,7 @@
 ! Spherical to cartesian transformation matrix obtained with
 ! Horton (http://theochem.github.com/horton/, 2015)

-! First index is the index of the carteisan AO, obtained by ao_power_index
+! First index is the index of the cartesian AO, obtained by ao_power_index
 ! Second index is the index of the spherical AO

 BEGIN_PROVIDER [ double precision, cart_to_sphe_0, (1,1) ]
--- a/src/ao_one_e_ints/pot_ao_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_ints.irp.f
@ -288,8 +288,6 @@ double precision function NAI_pol_mult(A_center,B_center,power_A,power_B,alpha,b
  ! sum of integrals of type : int {t,[0,1]}  exp-(rho.(P-Q)^2 * t^2) * t^i
  do i =0 ,n_pt_out,2
    accu +=  d(i) * rint(i/2,const)
-
-!    print *, i/2, const, d(i), rint(shiftr(i, 1), const)
  enddo
  NAI_pol_mult = accu * coeff

--- a/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f
+++ b/src/ao_one_e_ints/pot_ao_pseudo_ints.irp.f
@ -42,7 +42,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integrals_local, (ao_num,ao_num)]

  double precision               :: wall_1, wall_2, wall_0
  integer                        :: thread_num
-  integer                        :: omp_get_thread_num
+  integer, external              :: omp_get_thread_num
  double precision               :: c
  double precision               :: Z

@ -169,7 +169,7 @@ BEGIN_PROVIDER [ double precision, ao_pseudo_integrals_local, (ao_num,ao_num)]
  integer                        :: power_A(3),power_B(3)
  integer                        :: i,j,k,l,m
  double precision               :: Vloc, Vpseudo
-  integer                        :: omp_get_thread_num
+  integer, external              :: omp_get_thread_num

  double precision               :: wall_1, wall_2, wall_0
  integer                        :: thread_num
--- a/src/ao_one_e_ints/pseudopot.f90
+++ b/src/ao_one_e_ints/pseudopot.f90
@ -1237,7 +1237,7 @@ end
      integer nptsgridmax,nptsgrid,ik
      double precision p,q,r,s
      parameter(nptsgridmax=50)
-      double precision :: coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3)
+      double precision coefs_pseudo(nptsgridmax),ptsgrid(nptsgridmax,3)
      common/pseudos/coefs_pseudo,ptsgrid

      p=1.d0/dsqrt(2.d0)
--- a/src/ao_two_e_ints/test_cosgtos_1e.irp.f
+++ b/src/ao_two_e_ints/test_cosgtos_1e.irp.f
@ -1,191 +0,0 @@
-
-! ---
-
-program test_cosgtos
-
-  implicit none
-  integer :: i, j
-
-  call init_expo()
-
-!  call test_coef()
-  call test_1e_kin()
-  call test_1e_coul()
-
-  i = 1
-  j = 1
-!  call test_1e_coul_real(i, j)
-!  call test_1e_coul_cpx (i, j)
-
-end
-
-! ---
-
-subroutine init_expo()
-
-  implicit none
-
-  integer                       :: i, j
-  double precision, allocatable :: expo_im(:,:)
-
-  allocate(expo_im(ao_num, ao_prim_num_max))
-
-  do j = 1, ao_prim_num_max
-    do i = 1, ao_num
-      ao_expoim_cosgtos(i,j) = 0.d0
-    enddo
-  enddo
-
-  call ezfio_set_cosgtos_ao_int_ao_expoim_cosgtos(expo_im)
-
-  deallocate(expo_im)
-
-end subroutine init_expo
-
-! ---
-
-subroutine test_coef()
-
-  implicit none
-
-  integer          :: i, j
-  double precision :: coef, coef_gtos, coef_cosgtos 
-  double precision :: delta, accu_abs
-
-  print*, ' check coefs' 
-
-  accu_abs = 0.d0
-  accu_abs = 0.d0
-  do i = 1, ao_num
-    do j = 1, ao_prim_num(i)
-
-      coef         = ao_coef(i,j)
-      coef_gtos    = 1.d0 * ao_coef_normalized_ordered_transp(j,i)
-      coef_cosgtos = 2.d0 * ao_coef_norm_ord_transp_cosgtos  (j,i)
-
-      delta = dabs(coef_gtos - coef_cosgtos)
-      accu_abs += delta
-
-      if(delta .gt. 1.d-10) then
-        print*, ' problem on: '
-        print*, i, j
-        print*, coef_gtos, coef_cosgtos, delta
-        print*, coef 
-        stop
-      endif
-
-    enddo
-  enddo
-
-  print*, 'accu_abs = ', accu_abs 
-
-end subroutine test_coef
-
-! ---
-
-subroutine test_1e_kin()
-
-  implicit none
-
-  integer          :: i, j
-  double precision :: integral_gtos, integral_cosgtos
-  double precision :: delta, accu_abs
-
-  print*, ' check kin 1e integrals' 
-
-  accu_abs = 0.d0
-  accu_abs = 0.d0
-
-  do j = 1, ao_num
-    do i = 1, ao_num
-
-      integral_gtos    = ao_kinetic_integrals        (i,j) 
-      integral_cosgtos = ao_kinetic_integrals_cosgtos(i,j) 
-
-
-      delta = dabs(integral_gtos - integral_cosgtos)
-      accu_abs += delta
-
-      if(delta .gt. 1.d-7) then
-        print*, ' problem on: '
-        print*, i, j
-        print*, integral_gtos, integral_cosgtos, delta
-        !stop
-      endif
-
-    enddo
-  enddo
-
- print*,'accu_abs = ', accu_abs
-
-end subroutine test_1e_kin
-
-! ---
-
-subroutine test_1e_coul()
-
-  implicit none
-
-  integer          :: i, j
-  double precision :: integral_gtos, integral_cosgtos
-  double precision :: delta, accu_abs
-
-  print*, ' check Coulomb 1e integrals' 
-
-  accu_abs = 0.d0
-  accu_abs = 0.d0
-
-  do j = 1, ao_num
-    do i = 1, ao_num
-
-      integral_gtos    = ao_integrals_n_e        (i,j) 
-      integral_cosgtos = ao_integrals_n_e_cosgtos(i,j) 
-
-      delta = dabs(integral_gtos - integral_cosgtos)
-      accu_abs += delta
-
-      if(delta .gt. 1.d-7) then
-        print*, ' problem on: '
-        print*, i, j
-        print*, integral_gtos, integral_cosgtos, delta
-        !stop
-      endif
-
-    enddo
-  enddo
-
- print*,'accu_abs = ', accu_abs
-
-end subroutine test_1e_coul
-
-! ---
-
-subroutine test_1e_coul_cpx(i, j)
-
-  implicit none
-
-  integer, intent(in) :: i, j
-  double precision    :: integral
-
-  integral = ao_integrals_n_e_cosgtos(i,j) 
-
-  print*, ' cpx Coulomb 1e integrals', integral
-
-end subroutine test_1e_coul_cpx
-
-! ---
-
-subroutine test_1e_coul_real(i, j)
-
-  implicit none
-
-  integer, intent(in) :: i, j
-  double precision    :: integral
-
-  integral = ao_integrals_n_e(i,j) 
-
-  print*, ' real Coulomb 1e integrals', integral
-
-end subroutine test_1e_coul_real
-
-! ---
--- a/src/ao_two_e_ints/test_cosgtos_2e.irp.f
+++ b/src/ao_two_e_ints/test_cosgtos_2e.irp.f
@ -1,165 +0,0 @@
-
-! ---
-
-program test_cosgtos
-
-  implicit none
-  integer :: iao, jao, kao, lao
-
-  call init_expo()
-
-!  call test_coef()
-  call test_2e()
-
-  iao = 1
-  jao = 1
-  kao = 1
-  lao = 21
-!  call test_2e_cpx (iao, jao, kao, lao)
-!  call test_2e_real(iao, jao, kao, lao)
-
-end
-
-! ---
-
-subroutine init_expo()
-
-  implicit none
-
-  integer                       :: i, j
-  double precision, allocatable :: expo_im(:,:)
-
-  allocate(expo_im(ao_num, ao_prim_num_max))
-
-  do j = 1, ao_prim_num_max
-    do i = 1, ao_num
-      ao_expoim_cosgtos(i,j) = 0.d0
-    enddo
-  enddo
-
-  call ezfio_set_cosgtos_ao_int_ao_expoim_cosgtos(expo_im)
-
-  deallocate(expo_im)
-
-end subroutine init_expo
-
-! ---
-
-subroutine test_coef()
-
-  implicit none
-
-  integer          :: i, j
-  double precision :: coef, coef_gtos, coef_cosgtos 
-  double precision :: delta, accu_abs
-
-  print*, ' check coefs' 
-
-  accu_abs = 0.d0
-  accu_abs = 0.d0
-  do i = 1, ao_num
-    do j = 1, ao_prim_num(i)
-
-      coef         = ao_coef(i,j)
-      coef_gtos    = 1.d0 * ao_coef_normalized_ordered_transp(j,i)
-      coef_cosgtos = 2.d0 * ao_coef_norm_ord_transp_cosgtos  (j,i)
-
-      delta = dabs(coef_gtos - coef_cosgtos)
-      accu_abs += delta
-
-      if(delta .gt. 1.d-10) then
-        print*, ' problem on: '
-        print*, i, j
-        print*, coef_gtos, coef_cosgtos, delta
-        print*, coef 
-        stop
-      endif
-
-    enddo
-  enddo
-
-  print*, 'accu_abs = ', accu_abs 
-
-end subroutine test_coef
-
-
-! ---
-
-subroutine test_2e()
-
-  implicit none
-
-  integer          :: iao, jao, kao, lao
-  double precision :: integral_gtos, integral_cosgtos
-  double precision :: delta, accu_abs
-
-  double precision :: ao_two_e_integral, ao_two_e_integral_cosgtos
-
-  print*, ' check integrals' 
-
-  accu_abs = 0.d0
-  accu_abs = 0.d0
-
- ! iao = 1
- ! jao = 1
- ! kao = 1
- ! lao = 24
-
-  do iao = 1, ao_num ! r1
-    do jao = 1, ao_num ! r2
-      do kao = 1, ao_num ! r1
-        do lao = 1, ao_num ! r2
-
-          integral_gtos    = ao_two_e_integral        (iao, kao, jao, lao)
-          integral_cosgtos = ao_two_e_integral_cosgtos(iao, kao, jao, lao)
-
-          delta = dabs(integral_gtos - integral_cosgtos)
-          accu_abs += delta
-
-          if(delta .gt. 1.d-7) then
-            print*, ' problem on: '
-            print*, iao, jao, kao, lao
-            print*, integral_gtos, integral_cosgtos, delta
-            !stop
-          endif
-
-        enddo
-      enddo
-    enddo
-  enddo
-
- print*,'accu_abs = ', accu_abs
-
-end subroutine test_2e
-
-! ---
-
-subroutine test_2e_cpx(iao, jao, kao, lao)
-
-  implicit none
-  integer, intent(in) :: iao, jao, kao, lao
-  double precision    :: integral
-  double precision    :: ao_two_e_integral_cosgtos
-
-  integral = ao_two_e_integral_cosgtos(iao, kao, jao, lao)
-  print *, ' cosgtos: ', integral
-
-end subroutine test_2e_cpx
-
-! ---
-
-subroutine test_2e_real(iao, jao, kao, lao)
-
-  implicit none
-  integer, intent(in) :: iao, jao, kao, lao
-  double precision    :: integral
-  double precision    :: ao_two_e_integral
-
-  integral = ao_two_e_integral(iao, kao, jao, lao)
-  print *, ' gtos: ', integral
-
-end subroutine test_2e_real
-
-! ---
-
-
--- a/src/ao_two_e_ints/two_e_integrals.irp.f
+++ b/src/ao_two_e_ints/two_e_integrals.irp.f
@ -603,10 +603,7 @@ double precision function general_primitive_integral(dim,            &
  !DIR$ FORCEINLINE
  call multiply_poly(d_poly ,n_pt_tmp ,Iz_pol,n_Iz,d1,n_pt_out)
  double precision               :: rint_sum
-
  accu = accu + rint_sum(n_pt_out,const,d1)
-!  print *, n_pt_out, d1(0:n_pt_out)
-!  print *, accu

  general_primitive_integral = fact_p * fact_q * accu *pi_5_2*p_inv*q_inv/dsqrt(p+q)
 end
@ -871,15 +868,6 @@ subroutine give_polynom_mult_center_x(P_center,Q_center,a_x,d_x,p,q,n_pt_in,pq_i
  !DIR$ FORCEINLINE
  call I_x1_pol_mult(a_x,d_x,B10,B01,B00,C00,D00,d,n_pt1,n_pt_in)
  n_pt_out = n_pt1
-
-!  print *, ' '
-!  print *, a_x, d_x
-!  print *, B10, B01, B00, C00, D00
-!  print *, n_pt1, d(0:n_pt1)
-!  print *, ' '
-
-
-
  if(n_pt1<0)then
    n_pt_out = -1
    do i = 0,n_pt_in
--- a/src/bitmask/bitmasks_routines.irp.f
+++ b/src/bitmask/bitmasks_routines.irp.f
@ -268,6 +268,21 @@ subroutine print_spindet(string,Nint)

 end

+subroutine print_det_one_dimension(string,Nint)
+  use bitmasks
+  implicit none
+  BEGIN_DOC
+  ! Subroutine to print the content of a determinant using the '+-' notation
+  END_DOC
+  integer, intent(in)            :: Nint
+  integer(bit_kind), intent(in)  :: string(Nint)
+  character*(2048)                :: output(1)
+
+  call bitstring_to_str( output(1), string, Nint )
+  print *,  trim(output(1))
+
+end
+
 logical function is_integer_in_string(bite,string,Nint)
  use bitmasks
 implicit none
--- a/src/cipsi/EZFIO.cfg
+++ b/src/cipsi/EZFIO.cfg
@ -1,9 +1,3 @@
-[pert_2rdm]
-type: logical
-doc: If true, computes the one- and two-body rdms with perturbation theory
-interface: ezfio,provider,ocaml
-default: False
-
 [save_wf_after_selection]
 type: logical
 doc: If true, saves the wave function after the selection, before the diagonalization
@ -40,3 +34,9 @@ doc: Maximum number of excitation for beta determinants with respect to the Hart
 interface: ezfio,ocaml,provider
 default: -1

+[twice_hierarchy_max]
+type: integer
+doc: Twice the maximum hierarchy parameter (excitation degree plus half the seniority number). Using -1 selects all determinants
+interface: ezfio,ocaml,provider
+default: -1
+
--- a/src/cipsi/NEED
+++ b/src/cipsi/NEED
@ -2,5 +2,4 @@ perturbation
 zmq
 mpi
 iterations
-two_body_rdm
 csf
--- a/src/cipsi/pt2_stoch_routines.irp.f
+++ b/src/cipsi/pt2_stoch_routines.irp.f
@ -133,7 +133,7 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
  PROVIDE psi_bilinear_matrix_transp_order psi_selectors_coef_transp psi_det_sorted
  PROVIDE psi_det_hii selection_weight pseudo_sym
  PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
-  PROVIDE pert_2rdm excitation_beta_max  excitation_alpha_max excitation_max
+  PROVIDE excitation_beta_max  excitation_alpha_max excitation_max

  if (h0_type == 'CFG') then
    PROVIDE psi_configuration_hii det_to_configuration
@ -288,11 +288,12 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
      call write_int(6,nproc_target,'Number of threads for PT2')
      call write_double(6,mem,'Memory (Gb)')

-      call omp_set_max_active_levels(1)
+      call set_multiple_levels_omp(.False.)
+!      call omp_set_max_active_levels(1)


      print '(A)', '========== ======================= ===================== ===================== ==========='
-      print '(A)', ' Samples          Energy                Variance               Norm^2          Seconds'
+      print '(A)', ' Samples          Energy                Variance               Norm^2            Seconds'
      print '(A)', '========== ======================= ===================== ===================== ==========='

      PROVIDE global_selection_buffer
@ -315,14 +316,15 @@ subroutine ZMQ_pt2(E, pt2_data, pt2_data_err, relative_error, N_in)
      endif
      !$OMP END PARALLEL
      call end_parallel_job(zmq_to_qp_run_socket, zmq_socket_pull, 'pt2')
-      call omp_set_max_active_levels(8)
+      call set_multiple_levels_omp(.True.)
+!      call omp_set_max_active_levels(8)

      print '(A)', '========== ======================= ===================== ===================== ==========='

-    do k=1,N_states
-      pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate)
-    enddo
-    SOFT_TOUCH pt2_overlap
+      do k=1,N_states
+        pt2_overlap(pt2_stoch_istate,k) = pt2_data % overlap(k,pt2_stoch_istate)
+      enddo
+      SOFT_TOUCH pt2_overlap

    enddo
    FREE pt2_stoch_istate
@ -524,21 +526,21 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
        ! 1/(N-1.5) : see  Brugger, The American Statistician (23) 4 p. 32 (1969)
        if(c > 2) then
          eqt = dabs((pt2_data_S2(t) % pt2(pt2_stoch_istate) / c) - (pt2_data_S(t) % pt2(pt2_stoch_istate)/c)**2) ! dabs for numerical stability
-          eqt = sqrt(eqt / (dble(c) - 1.5d0))
+          eqt = dsqrt(eqt / (dble(c) - 1.5d0))
          pt2_data_err % pt2(pt2_stoch_istate) = eqt

          eqt = dabs((pt2_data_S2(t) % variance(pt2_stoch_istate) / c) - (pt2_data_S(t) % variance(pt2_stoch_istate)/c)**2) ! dabs for numerical stability
-          eqt = sqrt(eqt / (dble(c) - 1.5d0))
+          eqt = dsqrt(eqt / (dble(c) - 1.5d0))
          pt2_data_err % variance(pt2_stoch_istate) = eqt

          eqta(:) = dabs((pt2_data_S2(t) % overlap(:,pt2_stoch_istate) / c) - (pt2_data_S(t) % overlap(:,pt2_stoch_istate)/c)**2) ! dabs for numerical stability
-          eqta(:) = sqrt(eqta(:) / (dble(c) - 1.5d0))
+          eqta(:) = dsqrt(eqta(:) / (dble(c) - 1.5d0))
          pt2_data_err % overlap(:,pt2_stoch_istate) = eqta(:)


          if ((time - time1 > 1.d0) .or. (n==N_det_generators)) then
            time1 = time
-            print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.4)', c, &
+            print '(I10, X, F12.6, X, G10.3, X, F10.6, X, G10.3, X, F10.6, X, G10.3, X, F10.1)', c, &
              pt2_data     % pt2(pt2_stoch_istate) +E, &
              pt2_data_err % pt2(pt2_stoch_istate), &
              pt2_data     % variance(pt2_stoch_istate), &
@ -576,11 +578,11 @@ subroutine pt2_collector(zmq_socket_pull, E, relative_error, pt2_data, pt2_data_
      endif
      do i=1,n_tasks
        if(index(i).gt.size(pt2_data_I,1).or.index(i).lt.1)then
-         print*,'PB !!!'
-         print*,'If you see this, send a bug report with the following content'
-         print*,irp_here
-         print*,'i,index(i),size(pt2_data_I,1) = ',i,index(i),size(pt2_data_I,1)
-         stop -1
+          print*,'PB !!!'
+          print*,'If you see this, send a bug report with the following content'
+          print*,irp_here
+          print*,'i,index(i),size(pt2_data_I,1) = ',i,index(i),size(pt2_data_I,1)
+          stop -1
        endif
        call pt2_add(pt2_data_I(index(i)),1.d0,pt2_data_task(i))
        f(index(i)) -= 1
@ -843,7 +845,7 @@ END_PROVIDER
   do t=1, pt2_N_teeth
     tooth_width = tilde_cW(pt2_n_0(t+1)) - tilde_cW(pt2_n_0(t))
     if (tooth_width == 0.d0) then
-       tooth_width = sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1)))
+       tooth_width = max(1.d-15,sum(tilde_w(pt2_n_0(t):pt2_n_0(t+1))))
     endif
     ASSERT(tooth_width > 0.d0)
     do i=pt2_n_0(t)+1, pt2_n_0(t+1)
--- a/src/cipsi/selection.irp.f
+++ b/src/cipsi/selection.irp.f
@ -195,7 +195,10 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d

  integer                        :: l_a, nmax, idx
  integer, allocatable           :: indices(:), exc_degree(:), iorder(:)
-  double precision, parameter :: norm_thr = 1.d-16
+
+  ! Removed to avoid introducing determinants already presents in the wf
+  !double precision, parameter :: norm_thr = 1.d-16
+
  allocate (indices(N_det),                                          &
      exc_degree(max(N_det_alpha_unique,N_det_beta_unique)))

@ -215,10 +218,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
      i = psi_bilinear_matrix_rows(l_a)
      if (nt + exc_degree(i) <= 4) then
        idx = psi_det_sorted_order(psi_bilinear_matrix_order(l_a))
-        if (psi_average_norm_contrib_sorted(idx) > norm_thr) then
+        ! Removed to avoid introducing determinants already presents in the wf
+        !if (psi_average_norm_contrib_sorted(idx) > norm_thr) then
          indices(k) = idx
          k=k+1
-        endif
+        !endif
      endif
    enddo
  enddo
@ -242,10 +246,11 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d
        idx = psi_det_sorted_order(                                  &
            psi_bilinear_matrix_order(                               &
            psi_bilinear_matrix_transp_order(l_a)))
-        if (psi_average_norm_contrib_sorted(idx) > norm_thr) then
+        ! Removed to avoid introducing determinants already presents in the wf
+        !if (psi_average_norm_contrib_sorted(idx) > norm_thr) then
          indices(k) = idx
          k=k+1
-        endif
+        !endif
      endif
    enddo
  enddo
@ -464,27 +469,21 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d

      allocate (fullminilist (N_int, 2, fullinteresting(0)), &
                    minilist (N_int, 2,     interesting(0)) )
-      if(pert_2rdm)then
-        allocate(coef_fullminilist_rev(N_states,fullinteresting(0)))
-        do i=1,fullinteresting(0)
-          do j = 1, N_states
-            coef_fullminilist_rev(j,i) = psi_coef_sorted(fullinteresting(i),j)
-          enddo
-        enddo
-      endif
+!      if(pert_2rdm)then
+!        allocate(coef_fullminilist_rev(N_states,fullinteresting(0)))
+!        do i=1,fullinteresting(0)
+!          do j = 1, N_states
+!            coef_fullminilist_rev(j,i) = psi_coef_sorted(fullinteresting(i),j)
+!          enddo
+!        enddo
+!      endif

      do i=1,fullinteresting(0)
-        do k=1,N_int
-          fullminilist(k,1,i) = psi_det_sorted(k,1,fullinteresting(i))
-          fullminilist(k,2,i) = psi_det_sorted(k,2,fullinteresting(i))
-        enddo
+        fullminilist(:,:,i) = psi_det_sorted(:,:,fullinteresting(i))
      enddo

      do i=1,interesting(0)
-        do k=1,N_int
-          minilist(k,1,i) = psi_det_sorted(k,1,interesting(i))
-          minilist(k,2,i) = psi_det_sorted(k,2,interesting(i))
-        enddo
+        minilist(:,:,i) = psi_det_sorted(:,:,interesting(i))
      enddo

      do s2=s1,2
@ -531,19 +530,19 @@ subroutine select_singles_and_doubles(i_generator,hole_mask,particle_mask,fock_d

            call splash_pq(mask, sp, minilist, i_generator, interesting(0), bannedOrb, banned, mat, interesting)

-            if(.not.pert_2rdm)then
+!            if(.not.pert_2rdm)then
             call fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf)
-            else
-             call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0))
-            endif
+!            else
+!             call fill_buffer_double_rdm(i_generator, sp, h1, h2, bannedOrb, banned, fock_diag_tmp, E0, pt2_data, mat, buf,fullminilist, coef_fullminilist_rev, fullinteresting(0))
+!            endif
          end if
        enddo
        if(s1 /= s2) monoBdo = .false.
      enddo
      deallocate(fullminilist,minilist)
-      if(pert_2rdm)then
-        deallocate(coef_fullminilist_rev)
-      endif
+!      if(pert_2rdm)then
+!        deallocate(coef_fullminilist_rev)
+!      endif
    enddo
  enddo
  deallocate(preinteresting, prefullinteresting, interesting, fullinteresting)
@ -713,6 +712,25 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
        if (do_cycle) cycle
      endif

+      if (twice_hierarchy_max >= 0) then
+        s = 0
+        do k=1,N_int
+          s = s + popcnt(ieor(det(k,1),det(k,2)))
+        enddo
+        if ( mod(s,2)>0 ) stop 'For now, hierarchy CI is defined only for an even number of electrons'
+        if (excitation_ref == 1) then
+          call get_excitation_degree(HF_bitmask,det(1,1),degree,N_int)
+        else if (excitation_ref == 2) then
+          stop 'For now, hierarchy CI is defined only for a single reference determinant'
+!         do k=1,N_dominant_dets_of_cfgs
+!           call get_excitation_degree(dominant_dets_of_cfgs(1,1,k),det(1,1),degree,N_int)
+!         enddo
+        endif
+        integer :: twice_hierarchy
+        twice_hierarchy = degree + s/2
+        if (twice_hierarchy > twice_hierarchy_max) cycle
+      endif
+
      Hii = diag_H_mat_elem_fock(psi_det_generators(1,1,i_generator),det,fock_diag_tmp,N_int)

      w = 0d0
@ -834,8 +852,28 @@ subroutine fill_buffer_double(i_generator, sp, h1, h2, bannedOrb, banned, fock_d
            endif

        end select
+
+        ! To force the inclusion of determinants with a positive pt2 contribution
+        if (e_pert(istate) > 1d-8) then
+          w = -huge(1.0)
+        endif
+
      end do

+!!!BEGIN_DEBUG
+!      ! To check if the pt2 is taking determinants already in the wf
+!      if (is_in_wavefunction(det(N_int,1),N_int)) then
+!        logical, external :: is_in_wavefunction
+!        print*, 'A determinant contributing to the pt2 is already in'
+!        print*, 'the wave function:'
+!        call  print_det(det(N_int,1),N_int)
+!        print*,'contribution to the pt2 for the states:', e_pert(:)
+!        print*,'error in the filtering in'
+!        print*, 'cipsi/selection.irp.f sub:  selecte_singles_and_doubles'
+!        print*, 'abort'
+!        call abort
+!      endif
+!!!END_DEBUG

      integer(bit_kind) :: occ(N_int,2), n
      if (h0_type == 'CFG') then
@ -1556,7 +1594,7 @@ subroutine bitstring_to_list_in_selection( string, list, n_elements, Nint)
  use bitmasks
  implicit none
  BEGIN_DOC
-  ! Gives the inidices(+1) of the bits set to 1 in the bit string
+  ! Gives the indices(+1) of the bits set to 1 in the bit string
  END_DOC
  integer, intent(in)            :: Nint
  integer(bit_kind), intent(in)  :: string(Nint)
--- a/src/cipsi/selection_buffer.irp.f
+++ b/src/cipsi/selection_buffer.irp.f
@ -60,6 +60,7 @@ subroutine add_to_selection_buffer(b, det, val)
    b%val(b%cur) = val
    if(b%cur == size(b%val)) then
      call sort_selection_buffer(b)
+      b%cur = b%cur-1
    end if
  end if
 end subroutine
@ -86,43 +87,56 @@ subroutine merge_selection_buffers(b1, b2)
  double precision :: rss
  double precision, external :: memory_of_double
  sze = max(size(b1%val), size(b2%val))
-  rss = memory_of_double(sze) + 2*N_int*memory_of_double(sze)
-  call check_mem(rss,irp_here)
+!  rss = memory_of_double(sze) + 2*N_int*memory_of_double(sze)
+!  call check_mem(rss,irp_here)
  allocate(val(sze), detmp(N_int, 2, sze))
  i1=1
  i2=1
-  do i=1,nmwen
-    if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then
-      exit
-    else if (i1 > b1%cur) then
-        val(i) = b2%val(i2)
-        detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2)
-        detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2)
-        i2=i2+1
-    else if (i2 > b2%cur) then
-        val(i) = b1%val(i1)
-        detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1)
-        detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1)
-        i1=i1+1
-    else
-      if (b1%val(i1) <= b2%val(i2)) then
-        val(i) = b1%val(i1)
-        detmp(1:N_int,1,i) = b1%det(1:N_int,1,i1)
-        detmp(1:N_int,2,i) = b1%det(1:N_int,2,i1)
-        i1=i1+1
+
+  select case (N_int)
+BEGIN_TEMPLATE 
+  case $case
+    do i=1,nmwen
+      if ( (i1 > b1%cur).and.(i2 > b2%cur) ) then
+        exit
+      else if (i1 > b1%cur) then
+          val(i) = b2%val(i2)
+          detmp(1:$N_int,1,i) = b2%det(1:$N_int,1,i2)
+          detmp(1:$N_int,2,i) = b2%det(1:$N_int,2,i2)
+          i2=i2+1
+      else if (i2 > b2%cur) then
+          val(i) = b1%val(i1)
+          detmp(1:$N_int,1,i) = b1%det(1:$N_int,1,i1)
+          detmp(1:$N_int,2,i) = b1%det(1:$N_int,2,i1)
+          i1=i1+1
      else
-        val(i) = b2%val(i2)
-        detmp(1:N_int,1,i) = b2%det(1:N_int,1,i2)
-        detmp(1:N_int,2,i) = b2%det(1:N_int,2,i2)
-        i2=i2+1
+        if (b1%val(i1) <= b2%val(i2)) then
+          val(i) = b1%val(i1)
+          detmp(1:$N_int,1,i) = b1%det(1:$N_int,1,i1)
+          detmp(1:$N_int,2,i) = b1%det(1:$N_int,2,i1)
+          i1=i1+1
+        else
+          val(i) = b2%val(i2)
+          detmp(1:$N_int,1,i) = b2%det(1:$N_int,1,i2)
+          detmp(1:$N_int,2,i) = b2%det(1:$N_int,2,i2)
+          i2=i2+1
+        endif
      endif
-    endif
-  enddo
+    enddo
+    do i=nmwen+1,b2%N
+      val(i) = 0.d0
+!      detmp(1:$N_int,1,i) = 0_bit_kind
+!      detmp(1:$N_int,2,i) = 0_bit_kind
+    enddo
+SUBST [ case, N_int ]
+(1); 1;;
+(2); 2;;
+(3); 3;;
+(4); 4;;
+default; N_int;;
+END_TEMPLATE
+  end select
  deallocate(b2%det, b2%val)
-  do i=nmwen+1,b2%N
-    val(i) = 0.d0
-    detmp(1:N_int,1:2,i) = 0_bit_kind
-  enddo
  b2%det => detmp
  b2%val => val
  b2%mini = min(b2%mini,b2%val(b2%N))
@ -144,8 +158,8 @@ subroutine sort_selection_buffer(b)

  double precision :: rss
  double precision, external :: memory_of_double, memory_of_int
-  rss = memory_of_int(b%cur) + 2*N_int*memory_of_double(size(b%det,3))
-  call check_mem(rss,irp_here)
+!  rss = memory_of_int(b%cur) + 2*N_int*memory_of_double(size(b%det,3))
+!  call check_mem(rss,irp_here)
  allocate(iorder(b%cur), detmp(N_int, 2, size(b%det,3)))
  do i=1,b%cur
    iorder(i) = i
@ -225,14 +239,14 @@ subroutine make_selection_buffer_s2(b)
      endif
      dup = .True.
      do k=1,N_int
-        if ( (tmp_array(k,1,i) /= tmp_array(k,1,j))                   &
-              .or. (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then
+        if ( (tmp_array(k,1,i) /= tmp_array(k,1,j)) .or. &
+             (tmp_array(k,2,i) /= tmp_array(k,2,j)) ) then
          dup = .False.
          exit
        endif
      enddo
      if (dup) then
-        val(i) = max(val(i), val(j))
+        val(i) = min(val(i), val(j))
        duplicate(j) = .True.
      endif
      j+=1
@ -282,9 +296,6 @@ subroutine make_selection_buffer_s2(b)
    call configuration_to_dets_size(o(1,1,i),sze,elec_alpha_num,N_int)
    n_d = n_d + sze
    if (n_d > b%cur) then
-!      if (n_d - b%cur > b%cur - n_d + sze) then
-!        n_d = n_d - sze
-!      endif
      exit
    endif
  enddo
@ -329,10 +340,11 @@ subroutine remove_duplicates_in_selection_buffer(b)
  integer(bit_kind), allocatable :: tmp_array(:,:,:)
  logical, allocatable           :: duplicate(:)

-  n_d = b%cur
  logical                        :: found_duplicates
  double precision               :: rss
  double precision, external     :: memory_of_double
+
+  n_d = b%cur
  rss = (4*N_int+4)*memory_of_double(n_d)
  call check_mem(rss,irp_here)

--- a/src/cipsi/selection_weight.irp.f
+++ b/src/cipsi/selection_weight.irp.f
@ -38,11 +38,11 @@ subroutine update_pt2_and_variance_weights(pt2_data, N_st)

  avg = sum(pt2(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero

-  dt = 8.d0 !* selection_factor
+  dt = 4.d0 !* selection_factor
  do k=1,N_st
-    element = exp(dt*(pt2(k)/avg - 1.d0))
-    element = min(2.0d0 , element)
-    element = max(0.5d0 , element)
+    element = pt2(k) !exp(dt*(pt2(k)/avg - 1.d0))
+!    element = min(2.0d0 , element)
+!    element = max(0.5d0 , element)
    pt2_match_weight(k) *= element
  enddo

@ -50,9 +50,9 @@ subroutine update_pt2_and_variance_weights(pt2_data, N_st)
  avg = sum(variance(1:N_st)) / dble(N_st) + 1.d-32 ! Avoid future division by zero

  do k=1,N_st
-    element = exp(dt*(variance(k)/avg -1.d0))
-    element = min(2.0d0 , element)
-    element = max(0.5d0 , element)
+    element = variance(k) ! exp(dt*(variance(k)/avg -1.d0))
+!    element = min(2.0d0 , element)
+!    element = max(0.5d0 , element)
    variance_match_weight(k) *= element
  enddo

@ -62,6 +62,9 @@ subroutine update_pt2_and_variance_weights(pt2_data, N_st)
    variance_match_weight(:) = 1.d0
  endif

+  pt2_match_weight(:) = pt2_match_weight(:)/sum(pt2_match_weight(:))
+  variance_match_weight(:) = variance_match_weight(:)/sum(variance_match_weight(:))
+
  threshold_davidson_pt2 = min(1.d-6, &
     max(threshold_davidson, 1.e-1 * PT2_relative_error * minval(abs(pt2(1:N_states)))) )

@ -87,7 +90,7 @@ BEGIN_PROVIDER [ double precision, selection_weight, (N_states) ]
      selection_weight(1:N_states) = c0_weight(1:N_states)

     case (2)
-      print *,  'Using pt2-matching weight in selection'
+      print *,  'Using PT2-matching weight in selection'
      selection_weight(1:N_states) = c0_weight(1:N_states) * pt2_match_weight(1:N_states)
      print *, '# PT2 weight ', real(pt2_match_weight(:),4)

@ -97,7 +100,7 @@ BEGIN_PROVIDER [ double precision, selection_weight, (N_states) ]
      print *, '# var weight ', real(variance_match_weight(:),4)

     case (4)
-      print *,  'Using variance- and pt2-matching weights in selection'
+      print *,  'Using variance- and PT2-matching weights in selection'
      selection_weight(1:N_states) = c0_weight(1:N_states) * sqrt(variance_match_weight(1:N_states) * pt2_match_weight(1:N_states))
      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
      print *, '# var weight ', real(variance_match_weight(:),4)
@ -112,7 +115,7 @@ BEGIN_PROVIDER [ double precision, selection_weight, (N_states) ]
      selection_weight(1:N_states) = c0_weight(1:N_states)

     case (7)
-      print *,  'Input weights multiplied by variance- and pt2-matching'
+      print *,  'Input weights multiplied by variance- and PT2-matching'
      selection_weight(1:N_states) = c0_weight(1:N_states) * sqrt(variance_match_weight(1:N_states) * pt2_match_weight(1:N_states)) * state_average_weight(1:N_states)
      print *, '# PT2 weight ', real(pt2_match_weight(:),4)
      print *, '# var weight ', real(variance_match_weight(:),4)
@ -128,6 +131,7 @@ BEGIN_PROVIDER [ double precision, selection_weight, (N_states) ]
      print *, '# var weight ', real(variance_match_weight(:),4)

    end select
+     selection_weight(:) = selection_weight(:)/sum(selection_weight(:))
     print *, '# Total weight ', real(selection_weight(:),4)

 END_PROVIDER
--- a/src/cipsi/slave_cipsi.irp.f
+++ b/src/cipsi/slave_cipsi.irp.f
@ -4,7 +4,8 @@ subroutine run_slave_cipsi
 ! Helper program for distributed parallelism
  END_DOC

-  call omp_set_max_active_levels(1)
+  call set_multiple_levels_omp(.False.)
+!  call omp_set_max_active_levels(1)
  distributed_davidson = .False.
  read_wf = .False.
  SOFT_TOUCH read_wf distributed_davidson
@ -171,9 +172,11 @@ subroutine run_slave_main
      call write_double(6,(t1-t0),'Broadcast time')

      !---
-      call omp_set_max_active_levels(8)
+      call set_multiple_levels_omp(.True.)
+!      call omp_set_max_active_levels(8)
      call davidson_slave_tcp(0)
-      call omp_set_max_active_levels(1)
+      call set_multiple_levels_omp(.False.)
+!      call omp_set_max_active_levels(1)
      print *,  mpi_rank, ': Davidson done'
      !---

--- a/src/cipsi/zmq_selection.irp.f
+++ b/src/cipsi/zmq_selection.irp.f
@ -22,7 +22,7 @@ subroutine ZMQ_selection(N_in, pt2_data)
    PROVIDE psi_bilinear_matrix_transp_rows_loc psi_bilinear_matrix_transp_columns
    PROVIDE psi_bilinear_matrix_transp_order selection_weight pseudo_sym
    PROVIDE n_act_orb n_inact_orb n_core_orb n_virt_orb n_del_orb seniority_max
-    PROVIDE pert_2rdm excitation_beta_max  excitation_alpha_max excitation_max
+    PROVIDE excitation_beta_max  excitation_alpha_max excitation_max

    call new_parallel_job(zmq_to_qp_run_socket,zmq_socket_pull,'selection')

--- a/src/cis/cis.irp.f
+++ b/src/cis/cis.irp.f
@ -62,6 +62,7 @@ subroutine run
  else
   call H_apply_cis
  endif
+  print*,''
  print *,  'N_det = ', N_det
  print*,'******************************'
  print *,  'Energies  of the states:'
@ -69,11 +70,13 @@ subroutine run
    print *,  i, CI_energy(i)
  enddo
  if (N_states > 1) then
-    print*,'******************************'
-    print*,'Excitation energies '
+    print*,''
+    print*,'******************************************************'
+    print*,'Excitation energies (au)                     (eV)'
    do i = 2, N_states
-      print*, i ,CI_energy(i) - CI_energy(1)
+      print*, i ,CI_energy(i) - CI_energy(1), (CI_energy(i) - CI_energy(1))/0.0367502d0
    enddo
+    print*,''
  endif

  call ezfio_set_cis_energy(CI_energy)
--- a/src/cisd/30.cisd.bats
+++ b/src/cisd/30.cisd.bats
@ -77,7 +77,7 @@ function run() {
  [[ -n $TRAVIS ]] && skip
  qp set_file ch4.ezfio
  qp set_mo_class --core="[1]" --act="[2-30]" --del="[31-59]"
-  run -40.2403962667047 -39.843315
+  run -40.2403962667047 -39.8433221754964
 }

@test "SiH3" { # 20.2202s 1.38648m
--- a/src/cisd/cisd.irp.f
+++ b/src/cisd/cisd.irp.f
@ -63,7 +63,7 @@ subroutine run
  endif
  psi_coef = ci_eigenvectors
  SOFT_TOUCH psi_coef
-  call save_wavefunction
+  call save_wavefunction_truncated(save_threshold)
  call ezfio_set_cisd_energy(CI_energy)

  do i = 1,N_states
--- a/src/davidson/davidson_parallel.irp.f
+++ b/src/davidson/davidson_parallel.irp.f
@ -508,7 +508,8 @@ subroutine H_S2_u_0_nstates_zmq(v_0,s_0,u_0,N_st,sze)
  endif


-  call omp_set_max_active_levels(5)
+  call set_multiple_levels_omp(.True.)
+!  call omp_set_max_active_levels(5)

  !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
  ithread = omp_get_thread_num()
@ -546,21 +547,6 @@ end



-!BEGIN_PROVIDER [ integer, nthreads_davidson ]
-! implicit none
-! BEGIN_DOC
-! ! Number of threads for Davidson
-! END_DOC
-! nthreads_davidson = nproc
-! character*(32) :: env
-! call getenv('QP_NTHREADS_DAVIDSON',env)
-! if (trim(env) /= '') then
-!   read(env,*) nthreads_davidson
-!   call write_int(6,nthreads_davidson,'Target number of threads for <Psi|H|Psi>')
-! endif
-!END_PROVIDER
-
-
 integer function zmq_put_N_states_diag(zmq_to_qp_run_socket,worker_id)
  use f77_zmq
  implicit none
--- a/src/davidson/davidson_parallel_csf.irp.f
+++ b/src/davidson/davidson_parallel_csf.irp.f
@ -464,7 +464,8 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze)
    print *,  irp_here, ': Failed in zmq_set_running'
  endif

-  call omp_set_max_active_levels(4)
+  call set_multiple_levels_omp(.True.)
+!  call omp_set_max_active_levels(4)
  !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
  ithread = omp_get_thread_num()
  if (ithread == 0 ) then
--- a/src/davidson/davidson_parallel_nos2.irp.f
+++ b/src/davidson/davidson_parallel_nos2.irp.f
@ -464,7 +464,8 @@ subroutine H_u_0_nstates_zmq(v_0,u_0,N_st,sze)
    print *,  irp_here, ': Failed in zmq_set_running'
  endif

-  call omp_set_max_active_levels(4)
+  call set_multiple_levels_omp(.True.)
+!  call omp_set_max_active_levels(4)
  !$OMP PARALLEL DEFAULT(shared) NUM_THREADS(2) PRIVATE(ithread)
  ithread = omp_get_thread_num()
  if (ithread == 0 ) then
--- a/src/davidson/diagonalization_hs2_dressed.irp.f
+++ b/src/davidson/diagonalization_hs2_dressed.irp.f
@ -14,15 +14,6 @@ BEGIN_PROVIDER [ character*(64), diag_algorithm ]
  endif
 END_PROVIDER

-!BEGIN_PROVIDER [ double precision, threshold_davidson_pt2 ]
-! implicit none
-! BEGIN_DOC
-! ! Threshold of Davidson's algorithm, using PT2 as a guide
-! END_DOC
-! threshold_davidson_pt2 = threshold_davidson
-!
-!END_PROVIDER
-


 BEGIN_PROVIDER [ integer, dressed_column_idx, (N_states) ]
@ -154,7 +145,7 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
  character*(16384)              :: write_buffer
  double precision               :: to_print(3,N_st)
  double precision               :: cpu, wall
-  integer                        :: shift, shift2, itermax, istate, ii
+  integer                        :: shift, shift2, itermax, istate
  double precision               :: r1, r2, alpha
  logical                        :: state_ok(N_st_diag_in*davidson_sze_max)
  integer                        :: nproc_target
@ -354,27 +345,20 @@ subroutine davidson_diag_hjj_sjj(dets_in,u_in,H_jj,s2_out,energies,dim_in,sze,N_
      shift  = N_st_diag*(iter-1)
      shift2 = N_st_diag*iter

-      if ((iter > 1).or.(itertot == 1)) then
+!      if ((iter > 1).or.(itertot == 1)) then
        ! Compute |W_k> = \sum_i |i><i|H|u_k>
        ! -----------------------------------

        if ((sze > 100000).and.distributed_davidson) then
            call H_S2_u_0_nstates_zmq   (W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
        else
-            double precision               :: irp_rdtsc
-            double precision               :: ticks_0, ticks_1
-            integer*8                      :: irp_imax
-            irp_imax = 1
-            !ticks_0 = irp_rdtsc()
            call H_S2_u_0_nstates_openmp(W(1,shift+1),S_d,U(1,shift+1),N_st_diag,sze)
-            !ticks_1 = irp_rdtsc()
-            !print *,' ----Cycles:',(ticks_1-ticks_0)/dble(irp_imax)," ----"
        endif
        S(1:sze,shift+1:shift+N_st_diag) = real(S_d(1:sze,1:N_st_diag))
-      else
-         ! Already computed in update below
-         continue
-      endif
+!      else
+!         ! Already computed in update below
+!         continue
+!      endif

      if (dressing_state > 0) then

--- a/src/davidson/diagonalization_nonsym_h_dressed.irp.f
+++ b/src/davidson/diagonalization_nonsym_h_dressed.irp.f
@ -317,7 +317,7 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze,
      shift  = N_st_diag*(iter-1)
      shift2 = N_st_diag*iter

-      if( (iter > 1) .or. (itertot == 1) ) then
+!      if( (iter > 1) .or. (itertot == 1) ) then

        ! Gram-Schmidt to orthogonalize all new guess with the previous vectors 
        call ortho_qr(U, size(U, 1), sze, shift2)
@ -331,10 +331,10 @@ subroutine davidson_diag_nonsym_hjj(dets_in, u_in, H_jj, energies, dim_in, sze,
        else
          call H_u_0_nstates_openmp(W(1,shift+1), U(1,shift+1), N_st_diag, sze)
        endif
-      else
-         ! Already computed in update below
-         continue
-      endif
+!      else
+!         ! Already computed in update below
+!         continue
+!      endif

      if(dressing_state > 0) then

--- a/src/davidson/diagonalize_ci.irp.f
+++ b/src/davidson/diagonalize_ci.irp.f
@ -299,6 +299,7 @@ subroutine diagonalize_CI
 !  eigenstates of the |CI| matrix.
  END_DOC
  integer                        :: i,j
+  PROVIDE distributed_davidson
  do j=1,N_states
    do i=1,N_det
      psi_coef(i,j) = CI_eigenvectors(i,j)
--- a/src/davidson/input.irp.f
+++ b/src/davidson/input.irp.f
@ -1,39 +0,0 @@
-!BEGIN_PROVIDER [ integer, n_states_diag  ]
-!  implicit none
-!  BEGIN_DOC
-!! Number of states to consider during the Davdison diagonalization
-!  END_DOC
-!
-!  logical                        :: has
-!  PROVIDE ezfio_filename
-!  if (mpi_master) then
-!
-!    call ezfio_has_davidson_n_states_diag(has)
-!    if (has) then
-!      call ezfio_get_davidson_n_states_diag(n_states_diag)
-!    else
-!      print *, 'davidson/n_states_diag not found in EZFIO file'
-!      stop 1
-!    endif
-!    n_states_diag = max(2,N_states * N_states_diag)
-!  endif
-!  IRP_IF MPI_DEBUG
-!    print *,  irp_here, mpi_rank
-!    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
-!  IRP_ENDIF
-!  IRP_IF MPI
-!    include 'mpif.h'
-!    integer :: ierr
-!    call MPI_BCAST( n_states_diag, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
-!    if (ierr /= MPI_SUCCESS) then
-!      stop 'Unable to read n_states_diag with MPI'
-!    endif
-!  IRP_ENDIF
-!
-!  call write_time(6)
-!  if (mpi_master) then
-!    write(6, *) 'Read  n_states_diag'
-!  endif
-!
-!END_PROVIDER
-!
--- a/src/davidson_keywords/README.rst
+++ b/src/davidson_keywords/README.rst
@ -2,3 +2,4 @@
 davidson_keywords
 =================

+Keywords used for Davidson algorithms.
--- a/src/determinants/EZFIO.cfg
+++ b/src/determinants/EZFIO.cfg
@ -42,13 +42,13 @@ default: 2

 [weight_selection]
 type: integer
-doc: Weight used in the selection. 0: input state-average weight, 1: 1./(c_0^2), 2: rPT2 matching, 3: variance matching, 4: variance and rPT2 matching, 5: variance minimization and matching, 6: CI coefficients 7: input state-average multiplied by variance and rPT2 matching 8: input state-average multiplied by rPT2 matching 9: input state-average multiplied by variance matching
+doc: Weight used in the selection. 0: input state-average weight, 1: 1./(c_0^2), 2: PT2 matching, 3: variance matching, 4: variance and PT2 matching, 5: variance minimization and matching, 6: CI coefficients 7: input state-average multiplied by variance and PT2 matching 8: input state-average multiplied by PT2 matching 9: input state-average multiplied by variance matching
 interface: ezfio,provider,ocaml
 default: 1

 [threshold_generators]
 type: Threshold
-doc: Thresholds on generators (fraction of the square of the norm) 
+doc: Thresholds on generators (fraction of the square of the norm)
 interface: ezfio,provider,ocaml
 default: 0.999

@ -80,7 +80,7 @@ type: integer
 [psi_coef]
 interface: ezfio
 doc: Coefficients of the wave function
-type: double precision 
+type: double precision
 size: (determinants.n_det,determinants.n_states)

 [psi_det]
@ -92,7 +92,7 @@ size: (determinants.n_int*determinants.bit_kind/8,2,determinants.n_det)
 [psi_coef_qp_edit]
 interface: ezfio
 doc: Coefficients of the wave function
-type: double precision 
+type: double precision
 size: (determinants.n_det_qp_edit,determinants.n_states)

 [psi_det_qp_edit]
@ -126,13 +126,13 @@ default: 1.

 [thresh_sym]
 type: Threshold
-doc: Thresholds to check if a determinant is connected with HF 
+doc: Thresholds to check if a determinant is connected with HF
 interface: ezfio,provider,ocaml
 default: 1.e-15

 [pseudo_sym]
 type: logical
-doc: If |true|, discard any Slater determinants with an interaction smaller than thresh_sym with HF. 
+doc: If |true|, discard any Slater determinants with an interaction smaller than thresh_sym with HF.
 interface: ezfio,provider,ocaml
 default: False

--- a/src/determinants/s2.irp.f
+++ b/src/determinants/s2.irp.f
@ -103,13 +103,17 @@ BEGIN_PROVIDER [ double precision, expected_s2]

 END_PROVIDER

-BEGIN_PROVIDER [ double precision, s2_values, (N_states) ]
+ BEGIN_PROVIDER [ double precision, s2_values, (N_states) ]
+&BEGIN_PROVIDER [ double precision, s_values, (N_states) ]
 implicit none
 BEGIN_DOC
 ! array of the averaged values of the S^2 operator on the various states
 END_DOC
 integer :: i
 call u_0_S2_u_0(s2_values,psi_coef,n_det,psi_det,N_int,N_states,psi_det_size)
+ do i = 1, N_states
+  s_values(i) = 0.5d0 *(-1.d0 + dsqrt(1.d0 + 4.d0 * s2_values(i)))
+ enddo

 END_PROVIDER

--- a/src/determinants/slater_rules.irp.f
+++ b/src/determinants/slater_rules.irp.f
@ -438,7 +438,7 @@ subroutine bitstring_to_list_ab( string, list, n_elements, Nint)
  use bitmasks
  implicit none
  BEGIN_DOC
-  ! Gives the inidices(+1) of the bits set to 1 in the bit string
+  ! Gives the indices(+1) of the bits set to 1 in the bit string
  ! For alpha/beta determinants.
  END_DOC
  integer, intent(in)            :: Nint
--- a/src/dressing/alpha_factory.irp.f
+++ b/src/dressing/alpha_factory.irp.f
@ -1179,7 +1179,7 @@ subroutine bitstring_to_list_in_selection( string, list, n_elements, Nint)
  use bitmasks
  implicit none
  BEGIN_DOC
-  ! Gives the inidices(+1) of the bits set to 1 in the bit string
+  ! Gives the indices(+1) of the bits set to 1 in the bit string
  END_DOC
  integer, intent(in)            :: Nint
  integer(bit_kind), intent(in)  :: string(Nint)
--- a/src/dressing/run_dress_slave.irp.f
+++ b/src/dressing/run_dress_slave.irp.f
@ -72,7 +72,8 @@ subroutine run_dress_slave(thread,iproce,energy)
  provide psi_energy
  ending = dress_N_cp+1
  ntask_tbd = 0
-  call omp_set_max_active_levels(8)
+  call set_multiple_levels_omp(.True.)
+!  call omp_set_max_active_levels(8)

  !$OMP PARALLEL DEFAULT(SHARED) &
  !$OMP PRIVATE(interesting, breve_delta_m, task_id) &
@ -84,7 +85,8 @@ subroutine run_dress_slave(thread,iproce,energy)
  zmq_socket_push      = new_zmq_push_socket(thread)
  integer, external :: connect_to_taskserver
  !$OMP CRITICAL
-  call omp_set_max_active_levels(1)
+  call set_multiple_levels_omp(.False.)
+!  call omp_set_max_active_levels(1)
  if (connect_to_taskserver(zmq_to_qp_run_socket,worker_id,thread) == -1) then
    print *,  irp_here, ': Unable to connect to task server'
    stop -1
@ -296,7 +298,8 @@ subroutine run_dress_slave(thread,iproce,energy)
  !$OMP END CRITICAL

  !$OMP END PARALLEL
-  call omp_set_max_active_levels(1)
+  call set_multiple_levels_omp(.False.)
+!  call omp_set_max_active_levels(1)
 !  do i=0,dress_N_cp+1
 !    call omp_destroy_lock(lck_sto(i))
 !  end do
--- a/src/ezfio_files/output.irp.f
+++ b/src/ezfio_files/output.irp.f
@ -25,7 +25,7 @@ subroutine write_time(iunit)
  ct = ct - output_cpu_time_0
  call wall_time(wt)
  wt = wt - output_wall_time_0
-  write(6,'(A,F14.6,A,F14.6,A)') &
+  write(6,'(A,F14.2,A,F14.2,A)') &
    '.. >>>>> [ WALL TIME: ', wt, '  s ] [ CPU  TIME: ', ct, '  s ] <<<<< ..'
  write(6,*)
 end
--- a/src/iterations/print_summary.irp.f
+++ b/src/iterations/print_summary.irp.f
@ -98,7 +98,7 @@ subroutine print_summary(e_,pt2_data,pt2_data_err,n_det_,n_configuration_,n_st,s
    enddo
  endif

-  call print_energy_components()
+!  call print_energy_components()

 end subroutine

--- a/src/mo_basis/mos_in_r.irp.f
+++ b/src/mo_basis/mos_in_r.irp.f
@ -1,6 +1,9 @@

 subroutine give_all_mos_at_r(r,mos_array)
 implicit none
+ BEGIN_DOC
+! mos_array(i) = ith MO function evaluated at "r"
+ END_DOC
 double precision, intent(in) :: r(3)
 double precision, intent(out) :: mos_array(mo_num)
 double precision :: aos_array(ao_num)
--- a/src/scf_utils/scf_density_matrix_ao.irp.f
+++ b/src/scf_utils/scf_density_matrix_ao.irp.f
@ -3,12 +3,13 @@ BEGIN_PROVIDER [double precision, SCF_density_matrix_ao_alpha, (ao_num,ao_num) ]
   BEGIN_DOC
   ! $C.C^t$ over $\alpha$ MOs
   END_DOC
-   SCF_density_matrix_ao_alpha = 0.d0
-   if(elec_alpha_num.gt.0)then
-    call dgemm('N','T',ao_num,ao_num,elec_alpha_num,1.d0, &
-         mo_coef, size(mo_coef,1), &
-         mo_coef, size(mo_coef,1), 0.d0, &
-         SCF_density_matrix_ao_alpha, size(SCF_density_matrix_ao_alpha,1))
+   if(elec_alpha_num > 0)then
+     call dgemm('N','T',ao_num,ao_num,elec_alpha_num,1.d0, &
+          mo_coef, size(mo_coef,1), &
+          mo_coef, size(mo_coef,1), 0.d0, &
+          SCF_density_matrix_ao_alpha, size(SCF_density_matrix_ao_alpha,1))
+   else
+     SCF_density_matrix_ao_alpha = 0.d0
   endif

 !  integer          :: i, j
@ -29,12 +30,13 @@ BEGIN_PROVIDER [ double precision, SCF_density_matrix_ao_beta,  (ao_num,ao_num)
   BEGIN_DOC
   ! $C.C^t$ over $\beta$ MOs
   END_DOC
-   SCF_density_matrix_ao_beta = 0.d0
-   if(elec_beta_num.gt.0)then
-   call dgemm('N','T',ao_num,ao_num,elec_beta_num,1.d0, &
-        mo_coef, size(mo_coef,1), &
-        mo_coef, size(mo_coef,1), 0.d0, &
-        SCF_density_matrix_ao_beta, size(SCF_density_matrix_ao_beta,1))
+   if(elec_beta_num > 0)then
+     call dgemm('N','T',ao_num,ao_num,elec_beta_num,1.d0, &
+          mo_coef, size(mo_coef,1), &
+          mo_coef, size(mo_coef,1), 0.d0, &
+          SCF_density_matrix_ao_beta, size(SCF_density_matrix_ao_beta,1))
+   else
+     SCF_density_matrix_ao_beta = 0.d0
   endif

 END_PROVIDER
--- a/src/tools/NEED
+++ b/src/tools/NEED
@ -3,3 +3,4 @@ mo_two_e_erf_ints
 aux_quantities
 hartree_fock
 dft_utils_in_r
+two_body_rdm
--- a/src/tools/print_dipole.irp.f
+++ b/src/tools/print_dipole.irp.f
@ -1,5 +1,7 @@
 program print_dipole
 implicit none
+ read_wf = .True.
+ SOFT_TOUCH read_wf
 call print_z_dipole_moment_only

 end
--- a/src/tools/truncate_wf.irp.f
+++ b/src/tools/truncate_wf.irp.f
@ -0,0 +1,98 @@
+program truncate_wf
+ implicit none
+ BEGIN_DOC
+! Truncate the wave function
+ END_DOC
+ read_wf = .True.
+ if (s2_eig) then
+  call routine_s2
+ else
+  call routine
+ endif
+end
+
+subroutine routine
+ implicit none
+ integer :: ndet_max
+ print*, 'Max number of determinants ?'
+ read(5,*) ndet_max
+ integer(bit_kind), allocatable :: psi_det_tmp(:,:,:)
+ double precision, allocatable :: psi_coef_tmp(:,:)
+ allocate(psi_det_tmp(N_int,2,ndet_max),psi_coef_tmp(ndet_max, N_states))
+ 
+ integer :: i,j
+ double precision :: accu(N_states)
+ accu = 0.d0
+ do i = 1, ndet_max
+  do j = 1, N_int
+   psi_det_tmp(j,1,i) = psi_det_sorted(j,1,i)
+   psi_det_tmp(j,2,i) = psi_det_sorted(j,2,i)
+  enddo
+  do j = 1, N_states
+   psi_coef_tmp(i,j) = psi_coef_sorted(i,j)
+   accu(j) += psi_coef_tmp(i,j) **2
+  enddo
+ enddo
+ do j = 1, N_states
+  accu(j) = 1.d0/dsqrt(accu(j))
+ enddo
+ do j = 1, N_states
+  do i = 1, ndet_max
+   psi_coef_tmp(i,j) = psi_coef_tmp(i,j) * accu(j)
+  enddo
+ enddo
+
+ call save_wavefunction_general(ndet_max,N_states,psi_det_tmp,size(psi_coef_tmp,1),psi_coef_tmp)
+ 
+end
+
+subroutine routine_s2
+ implicit none
+ integer :: ndet_max
+ double precision :: wmin
+ integer(bit_kind), allocatable :: psi_det_tmp(:,:,:)
+ double precision, allocatable :: psi_coef_tmp(:,:)
+ integer :: i,j,k
+ double precision :: accu(N_states)
+
+ print *,  'Weights of the CFG'
+ do i=1,N_det
+   print *,  i, real(weight_configuration(det_to_configuration(i),:)), real(sum(weight_configuration(det_to_configuration(i),:)))
+ enddo
+ print*, 'Min weight of the configuration?'
+ read(5,*) wmin
+
+ ndet_max = 0
+ do i=1,N_det
+   if (maxval(weight_configuration( det_to_configuration(i),:)) < wmin) cycle
+   ndet_max = ndet_max+1
+ enddo
+
+ allocate(psi_det_tmp(N_int,2,ndet_max),psi_coef_tmp(ndet_max, N_states))
+ 
+ accu = 0.d0
+ k=0
+ do i = 1, N_det
+  if (maxval(weight_configuration( det_to_configuration(i),:)) < wmin) cycle
+  k = k+1
+  do j = 1, N_int
+   psi_det_tmp(j,1,k) = psi_det(j,1,i)
+   psi_det_tmp(j,2,k) = psi_det(j,2,i)
+  enddo
+  do j = 1, N_states
+   psi_coef_tmp(k,j) = psi_coef(i,j)
+   accu(j) += psi_coef_tmp(k,j)**2
+  enddo
+ enddo
+ do j = 1, N_states
+  accu(j) = 1.d0/dsqrt(accu(j))
+ enddo
+ do j = 1, N_states
+  do i = 1, ndet_max
+   psi_coef_tmp(i,j) = psi_coef_tmp(i,j) * accu(j)
+  enddo
+ enddo
+
+ call save_wavefunction_general(ndet_max,N_states,psi_det_tmp,size(psi_coef_tmp,1),psi_coef_tmp)
+ 
+end
--- a/src/utils/constants.include.F
+++ b/src/utils/constants.include.F
@ -14,7 +14,7 @@ double precision, parameter :: thresh = 1.d-15
 double precision, parameter :: cx_lda = -0.73855876638202234d0
 double precision, parameter :: c_2_4_3 = 2.5198420997897464d0
 double precision, parameter :: cst_lda = -0.93052573634909996d0
-double precision, parameter :: c_4_3 = 1.3333333333333333d0
-double precision, parameter :: c_1_3 = 0.3333333333333333d0
+double precision, parameter :: c_4_3 = 4.d0/3.d0
+double precision, parameter :: c_1_3 = 1.d0/3.d0
 double precision, parameter :: sq_op5 = dsqrt(0.5d0)
 double precision, parameter :: dlog_2pi =  dlog(2.d0*dacos(-1.d0))
--- a/src/utils/memory.irp.f
+++ b/src/utils/memory.irp.f
@ -114,7 +114,7 @@ subroutine print_memory_usage()
  call resident_memory(rss)
  call total_memory(mem)

-  write(*,'(A,F14.6,A,F14.6,A)') &
+  write(*,'(A,F14.3,A,F14.3,A)') &
    '.. >>>>> [ RES  MEM : ', rss , &
        ' GB ] [ VIRT MEM : ', mem, ' GB ] <<<<< ..'
 end
--- a/src/utils/set_multiple_levels_omp.irp.f
+++ b/src/utils/set_multiple_levels_omp.irp.f
@ -0,0 +1,26 @@
+subroutine set_multiple_levels_omp(activate)
+
+  BEGIN_DOC
+! If true, activate OpenMP nested parallelism. If false, deactivate.
+  END_DOC
+
+  implicit none
+  logical, intent(in) :: activate
+
+  if (activate) then
+    call omp_set_max_active_levels(3)
+
+    IRP_IF SET_NESTED
+      call omp_set_nested(.True.)
+    IRP_ENDIF
+
+  else
+
+    call omp_set_max_active_levels(1)
+
+    IRP_IF SET_NESTED
+      call omp_set_nested(.False.)
+    IRP_ENDIF
+  end if
+
+end
--- a/src/utils/util.irp.f
+++ b/src/utils/util.irp.f
@ -328,7 +328,7 @@ BEGIN_PROVIDER [ integer, nproc ]
  ! Number of current OpenMP threads
  END_DOC

-  integer                        :: omp_get_num_threads
+  integer, external              :: omp_get_num_threads
  nproc = 1
  !$OMP PARALLEL
  !$OMP MASTER